diff --git a/MG5aMC/mg5amcnlo b/MG5aMC/mg5amcnlo index 49c93e01b8..d8c1613ccf 160000 --- a/MG5aMC/mg5amcnlo +++ b/MG5aMC/mg5amcnlo @@ -1 +1 @@ -Subproject commit 49c93e01b8596cbdb4e65f628601de1e6f08c744 +Subproject commit d8c1613ccf638b5b078a64379e385def5649622c diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py index a0cd9dbfb3..82661c6c66 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py @@ -55,6 +55,6 @@ __author__ = 'Andrea Valassi' __email__ = 'andrea.valassi@cern.ch' __version__ = (1,0,0) - minimal_mg5amcnlo_version = (3,5,1) + minimal_mg5amcnlo_version = (3,5,2) maximal_mg5amcnlo_version = (1000,1000,1000) - latest_validated_version = (3,5,1) + latest_validated_version = (3,5,2) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py index c9d1c7706a..0b849330ef 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/launch_plugin.py @@ -57,7 +57,7 @@ def reset_simd(self, old_value, new_value, name): return Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - + def plugin_input(self, finput): return @@ -79,7 +79,7 @@ def check_validity(self): self['sde_strategy'] = 1 if self['hel_recycling']: self['hel_recycling'] = False - + class GPURunCard(CPPRunCard): def default_setup(self): super(CPPRunCard, self).default_setup() diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index 8961036fb1..5b557e832a 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -200,9 +200,9 @@ def convert_model(self, model, wanted_lorentz=[], wanted_coupling=[]): # AV (default from OM's tutorial) - add a debug printout def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): """Typically creating jpeg/HTML output/ compilation/... - cmdhistory is the list of command used so far. - MG5options are all the options of the main interface - outputflags is a list of options provided when doing the output command""" + cmdhistory is the list of command used so far. + MG5options are all the options of the main interface + outputflags is a list of options provided when doing the output command""" misc.sprint('Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self)) if self.in_madevent_mode: self.add_input_for_banner() @@ -214,7 +214,7 @@ def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): #if os.system(path + os.sep + 'patchMad.sh ' + self.dir_path + ' PROD ' + patchlevel) != 0: # logger.debug("####### \n stdout is \n %s", stdout) # logger.info("####### \n stderr is \n %s", stderr) - # raise Exception('ERROR! the O/S call to patchMad.sh failed') + # raise Exception('ERROR! the O/S call to patchMad.sh failed') # OLD implementation (SH PR #762) #if os.system(PLUGINDIR + os.sep + 'patchMad.sh ' + self.dir_path + ' PROD ' + patchlevel) != 0: # logger.debug("####### \n stdout is \n %s", stdout) @@ -267,7 +267,7 @@ def add_madevent_plugin_fct(self): which contains a series of functions and one dictionary variable TO_OVERWRITE that will be used to have temporary overwrite of all the key variable passed as string by their value. all variable that are file related should be called as madgraph.dir.file.variable - """ + """ plugin_path = os.path.dirname(os.path.realpath( __file__ )) files.cp(pjoin(plugin_path, 'launch_plugin.py'), pjoin(self.dir_path, 'bin', 'internal')) files.ln(pjoin(self.dir_path, 'lib'), pjoin(self.dir_path, 'SubProcesses')) @@ -283,10 +283,10 @@ def change_output_args(args, cmd): if 'vector_size' not in ''.join(args): args.append('--vector_size=16') return args - + #------------------------------------------------------------------------------------ -class GPU_ProcessExporter(PLUGIN_ProcessExporter): +class GPU_ProcessExporter(PLUGIN_ProcessExporter): def change_output_args(args, cmd): """ """ cmd._export_format = "madevent" @@ -295,7 +295,7 @@ def change_output_args(args, cmd): if 'vector_size' not in ''.join(args): args.append('--vector_size=16384') return args - + def finalize(self, matrix_element, cmdhistory, MG5options, outputflag): misc.sprint("enter dedicated function") out = super().finalize(matrix_element, cmdhistory, MG5options, outputflag) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 16a5e3cdc9..e6546f684c 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -14,7 +14,7 @@ Running MG5 in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect 2023-08-08 * +* VERSION 3.5.2_lo_vect 2023-11-08 * * * * WARNING: UNKNOWN DEVELOPMENT VERSION. * * WARNING: DO NOT USE FOR PRODUCTION * @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005366802215576172  +DEBUG: model prefixing takes 0.005372047424316406  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -154,7 +154,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.005 s +1 processes with 2 diagrams generated in 0.004 s Total: 1 processes with 2 diagrams output madevent ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -174,7 +174,7 @@ INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Processing color information for process: e+ e- > mu+ mu- @1 INFO: Creating files in directory P1_epem_mupmum DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -183,27 +183,27 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  WARNING: vector code for lepton pdf not implemented. We removed the option to run dressed lepton  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group epem_mupmum Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.102 s +Wrote files for 8 helas calls in 0.098 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.201 s +ALOHA: aloha creates 3 routines in 0.200 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.258 s +ALOHA: aloha creates 7 routines in 0.255 s FFV1 FFV1 FFV2 @@ -232,6 +232,7 @@ patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py +Hunk #1 succeeded at 391 (offset 6 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f @@ -248,9 +249,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.973s -user 0m1.681s -sys 0m0.231s +real 0m4.853s +user 0m1.653s +sys 0m0.201s ************************************************************ * * * W E L C O M E to * @@ -263,7 +264,7 @@ sys 0m0.231s * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect * +* VERSION 3.5.2_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * @@ -297,7 +298,7 @@ launch in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect * +* VERSION 3.5.2_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat index b9e01f684b..618adbca06 100644 --- a/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat @@ -8,7 +8,7 @@ #* * * * #* * #* * -#* VERSION 3.5.1_lo_vect 2023-08-08 * +#* VERSION 3.5.2_lo_vect 2023-11-08 * #* * #* WARNING: UNKNOWN DEVELOPMENT VERSION. * #* WARNING: DO NOT USE FOR PRODUCTION * diff --git a/epochX/cudacpp/ee_mumu.mad/MGMEVersion.txt b/epochX/cudacpp/ee_mumu.mad/MGMEVersion.txt index 1c1a95761b..85c67c3554 100644 --- a/epochX/cudacpp/ee_mumu.mad/MGMEVersion.txt +++ b/epochX/cudacpp/ee_mumu.mad/MGMEVersion.txt @@ -1 +1 @@ -3.5.1_lo_vect \ No newline at end of file +3.5.2_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MGVersion.txt index 1c1a95761b..85c67c3554 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MGVersion.txt +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MGVersion.txt @@ -1 +1 @@ -3.5.1_lo_vect \ No newline at end of file +3.5.2_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc index 0af629d3a8..fc293da1de 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.h b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.h index f2ef5c1e14..77b610753c 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.h +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f index f78f7c102e..02520466e6 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f index fcf2e4dec5..4188745070 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -225,7 +225,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f index 21e300b33e..1991a72bb9 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -319,7 +319,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/genps.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/genps.f index fe9c61504b..c00e33d954 100644 --- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/genps.f +++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/genps.f @@ -1877,12 +1877,12 @@ double precision function get_channel_cut(p, config) d1 = iforest(1, -i, config) d2 = iforest(2, -i, config) do j=0,3 - if (d1.gt.0.and.d1.le.2) then + if (d1.gt.0.and.d1.le.nincoming) then ptemp(j,-i) = ptemp(j,-i) - ptemp(j, d1) else ptemp(j,-i) = ptemp(j,-i)+ptemp(j, d1) endif - if (d2.gt.0.and.d2.le.2) then + if (d2.gt.0.and.d2.le.nincoming) then ptemp(j,-i) = ptemp(j,-i) - ptemp(j, d2) else ptemp(j,-i) = ptemp(j,-i)+ptemp(j, d2) diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/__init__.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/__init__.py index 16e60d8182..0d17042f0d 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/__init__.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/__init__.py @@ -26,6 +26,7 @@ class aMCatNLOError(MadGraph5Error): import os import logging import time +pjoin = os.path.join #Look for basic file position MG5DIR and MG4DIR MG5DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py index ef1bf58979..3995ce8109 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py @@ -2704,7 +2704,8 @@ def __new__(cls, finput=None, **opt): except Exception as error: import launch_plugin target_class = launch_plugin.RunCard - + elif issubclass(finput, RunCard): + target_class = finput else: return None @@ -2968,11 +2969,12 @@ def write(self, output_file, template=None, python_template=False, if python_template and not to_write: import string if self.blocks: - text = string.Template(text) mapping = {} for b in self.blocks: mapping[b.name] = b.get_template(self) - text = text.substitute(mapping) + if "$%s" % b.name not in text: + text += "\n$%s\n" % b.name + text = string.Template(text).substitute(mapping) if not self.list_parameter: text = text % self @@ -4875,6 +4877,9 @@ def create_default_for_process(self, proc_characteristic, history, proc_def): continue break + if proc_characteristic['ninitial'] == 1: + self['SDE_strategy'] =1 + if 'MLM' in proc_characteristic['limitations']: if self['dynamical_scale_choice'] == -1: self['dynamical_scale_choice'] = 3 @@ -5940,7 +5945,7 @@ def default_setup(self): self.add_param("CheckCycle", 3) self.add_param("MaxAttempts", 10) self.add_param("ZeroThres", 1e-9) - self.add_param("OSThres", 1.0e-13) + self.add_param("OSThres", 1.0e-8) self.add_param("DoubleCheckHelicityFilter", True) self.add_param("WriteOutFilters", True) self.add_param("UseLoopFilter", False) diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/common_run_interface.py index 14c7f310dc..87cb4b88df 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/common_run_interface.py @@ -4906,6 +4906,7 @@ def __init__(self, question, cards=[], from_banner=None, banner=None, mode='auto self.load_default() self.define_paths(**opt) self.last_editline_pos = 0 + self.update_dependent_done = False if 'allow_arg' not in opt or not opt['allow_arg']: # add some mininal content for this: @@ -6585,7 +6586,9 @@ def postcmd(self, stop, line): self.check_card_consistency() if self.param_consistency: try: - self.do_update('dependent', timer=20) + if not self.update_dependent_done: + self.do_update('dependent', timer=20) + self.update_dependent_done = False except MadGraph5Error as error: if 'Missing block:' in str(error): self.fail_due_to_format +=1 @@ -6638,6 +6641,8 @@ def do_update(self, line, timer=0): self.update_dependent(self.mother_interface, self.me_dir, self.param_card, self.paths['param'], timer, run_card=self.run_card, lhapdfconfig=self.lhapdf) + self.update_dependent_done = True + elif args[0] == 'missing': self.update_missing() @@ -6717,12 +6722,13 @@ class TimeOutError(Exception): def handle_alarm(signum, frame): raise TimeOutError signal.signal(signal.SIGALRM, handle_alarm) + if timer: - signal.alarm(timer) log_level=30 else: log_level=20 + if run_card: as_for_pdf = {'cteq6_m': 0.118, 'cteq6_d': 0.118, @@ -6781,6 +6787,10 @@ def handle_alarm(signum, frame): logger.log(log_level, "update the strong coupling value (alpha_s) to the value from the pdf selected: %s", as_for_pdf[pdlabel]) modify = True + if timer: + signal.alarm(timer) + + # Try to load the model in the limited amount of time allowed try: model = mecmd.get_model() @@ -6909,7 +6919,8 @@ def check_block(self, blockname): def check_answer_consistency(self): """function called if the code reads a file""" self.check_card_consistency() - self.do_update('dependent', timer=20) + if not self.update_dependent_done: + self.do_update('dependent', timer=20) def help_set(self): '''help message for set''' diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/gen_ximprove.py index a88d60b282..5fd170d18d 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/gen_ximprove.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/gen_ximprove.py @@ -157,12 +157,16 @@ def get_helicity(self, to_submit=True, clean=True): (stdout, _) = p.communicate(''.encode()) stdout = stdout.decode('ascii',errors='ignore') - try: + if stdout: nb_channel = max([math.floor(float(d)) for d in stdout.split()]) - except Exception as error: - misc.sprint(stdout, 'no channel or error for %s' % Pdir) - continue - + else: + for matrix_file in misc.glob('matrix*orig.f', Pdir): + files.cp(matrix_file, matrix_file.replace('orig','optim')) + P_zero_result.append(Pdir) + if os.path.exists(pjoin(self.me_dir, 'error')): + os.remove(pjoin(self.me_dir, 'error')) + continue # bypass bad process + self.cmd.compile(['madevent_forhel'], cwd=Pdir) if not os.path.exists(pjoin(Pdir, 'madevent_forhel')): raise Exception('Error make madevent_forhel not successful') @@ -183,11 +187,13 @@ def get_helicity(self, to_submit=True, clean=True): #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts (stdout, _) = p.communicate(" ".encode()) stdout = stdout.decode('ascii',errors='ignore') - if os.path.exists(pjoin(self.me_dir,'error')): + if os.path.exists(pjoin(self.me_dir, 'error')): raise Exception(pjoin(self.me_dir,'error')) # note a continue is not enough here, we have in top to link # the matrixX_optim.f to matrixX_orig.f to let the code to work # after this error. + # for matrix_file in misc.glob('matrix*orig.f', Pdir): + # files.cp(matrix_file, matrix_file.replace('orig','optim')) if 'no events passed cuts' in stdout: raise Exception diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py index c9d1c7706a..0b849330ef 100644 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py @@ -57,7 +57,7 @@ def reset_simd(self, old_value, new_value, name): return Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - + def plugin_input(self, finput): return @@ -79,7 +79,7 @@ def check_validity(self): self['sde_strategy'] = 1 if self['hel_recycling']: self['hel_recycling'] = False - + class GPURunCard(CPPRunCard): def default_setup(self): super(CPPRunCard, self).default_setup() diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/madevent_interface.py index d722702891..853aabc98a 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/madevent_interface.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/madevent_interface.py @@ -1,4 +1,4 @@ -################################################################################ +############################################################################### # # Copyright (c) 2011 The MadGraph5_aMC@NLO Development team and Contributors # @@ -3675,7 +3675,7 @@ def do_refine(self, line): devnull.close() ############################################################################ - def do_combine_iteration(self, line): + def do_comine_iteration(self, line): """Not in help: Combine a given iteration combine_iteration Pdir Gdir S|R step S is for survey R is for refine @@ -3703,7 +3703,7 @@ def do_combine_iteration(self, line): ############################################################################ def do_combine_events(self, line): """Advanced commands: Launch combine events""" - + start=time.time() args = self.split_arg(line) start = time.time() # Check argument's validity @@ -3798,9 +3798,7 @@ def do_combine_events(self, line): self.correct_bias() elif self.run_card['custom_fcts']: self.correct_bias() - - logger.info("combine events done in %s", time.time()-start) - + logger.info("combination of events done in %s s ", time.time()-start) self.to_store.append('event') @@ -7368,7 +7366,7 @@ def wait_monitoring(Idle, Running, Done): import optparse # Get the directory of the script real path (bin) # and add it to the current PYTHONPATH - root_path = os.path.dirname(os.path.dirname(os.path.realpath( __file__ ))) + #root_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath( __file__ )))) sys.path.insert(0, root_path) class MyOptParser(optparse.OptionParser): @@ -7411,7 +7409,13 @@ def error(self, msg=''): import logging.config # Set logging level according to the logging level given by options #logging.basicConfig(level=vars(logging)[options.logging]) + import internal import internal.coloring_logging + # internal.file = XXX/bin/internal/__init__.py + # => need three dirname to get XXX + # we use internal to have any issue with pythonpath finding the wrong file + me_dir = os.path.dirname(os.path.dirname(os.path.dirname(internal.__file__))) + print("me_dir is", me_dir) try: if __debug__ and options.logging == 'INFO': options.logging = 'DEBUG' @@ -7419,7 +7423,8 @@ def error(self, msg=''): level = int(options.logging) else: level = eval('logging.' + options.logging) - logging.config.fileConfig(os.path.join(root_path, 'internal', 'me5_logging.conf')) + log_path = os.path.join(me_dir, 'bin', 'internal', 'me5_logging.conf') + logging.config.fileConfig(log_path) logging.root.setLevel(level) logging.getLogger('madgraph').setLevel(level) except: @@ -7433,9 +7438,9 @@ def error(self, msg=''): if '--web' in args: i = args.index('--web') args.pop(i) - cmd_line = MadEventCmd(os.path.dirname(root_path),force_run=True) + cmd_line = MadEventCmd(me_dir, force_run=True) else: - cmd_line = MadEventCmdShell(os.path.dirname(root_path),force_run=True) + cmd_line = MadEventCmdShell(me_dir, force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print(parser_error) diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/misc.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/misc.py index d3fed3baa2..91cd3e5c22 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/misc.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/misc.py @@ -347,7 +347,7 @@ def tell(msg): if dependency=='ninja': if cmd.options['ninja'] in ['None',None,''] or\ (cmd.options['ninja'] == './HEPTools/lib' and not MG5dir is None and\ - which_lib(pjoin(MG5dir,cmd.options['ninja'],'libninja.a')) is None): + which_lib(pjoin(MG5dir,cmd.options['ninja'],'lib','libninja.a')) is None): tell("Installing ninja...") cmd.do_install('ninja') diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/shower_card.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/shower_card.py index c6d3948cc4..c344ea1b15 100755 --- a/epochX/cudacpp/ee_mumu.mad/bin/internal/shower_card.py +++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/shower_card.py @@ -45,7 +45,9 @@ class ShowerCard(dict): false = ['.false.', 'f', 'false', '0'] logical_vars = ['ue_enabled', 'hadronize', 'b_stable', 'pi_stable', 'wp_stable', 'wm_stable', 'z_stable', 'h_stable', 'tap_stable', 'tam_stable', - 'mup_stable', 'mum_stable', 'is_4lep', 'is_bbar', 'combine_td'] + 'mup_stable', 'mum_stable', 'is_4lep', 'is_bbar', 'combine_td', + 'space_shower_me_corrections', 'time_shower_me_corrections', + 'time_shower_me_extended', 'time_shower_me_after_first'] string_vars = ['extralibs', 'extrapaths', 'includepaths', 'analyse'] for i in range(1,100): string_vars.append('dm_'+str(i)) @@ -82,7 +84,11 @@ class ShowerCard(dict): 'b_mass' : {'HERWIG6':'b_mass', 'PYTHIA6': 'b_mass', 'HERWIGPP': 'b_mass', 'PYTHIA8': 'b_mass'}, 'analyse' : {'HERWIG6':'hwuti', 'PYTHIA6':'pyuti', 'HERWIGPP':'hwpputi', 'PYTHIA8':'py8uti'}, 'qcut' : {'PYTHIA8':'qcut'}, - 'njmax' : {'PYTHIA8':'njmax'}} + 'njmax' : {'PYTHIA8':'njmax'}, + 'space_shower_me_corrections' : {'PYTHIA8':'space_shower_me_corrections'}, + 'time_shower_me_corrections' : {'PYTHIA8':'time_shower_me_corrections'}, + 'time_shower_me_extended' : {'PYTHIA8':'time_shower_me_extended'}, + 'time_shower_me_after_first' : {'PYTHIA8':'time_shower_me_after_first'}} stdhep_dict = {'HERWIG6':'mcatnlo_hwan_stdhep.o', 'PYTHIA6':'mcatnlo_pyan_stdhep.o'} diff --git a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h index 19819e2451..9fa30cfd7f 100644 --- a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h @@ -8,7 +8,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/ee_mumu.mad/src/Parameters_sm.cc b/epochX/cudacpp/ee_mumu.mad/src/Parameters_sm.cc index 31f620c44e..0b4be4d5ed 100644 --- a/epochX/cudacpp/ee_mumu.mad/src/Parameters_sm.cc +++ b/epochX/cudacpp/ee_mumu.mad/src/Parameters_sm.cc @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/ee_mumu.mad/src/Parameters_sm.h b/epochX/cudacpp/ee_mumu.mad/src/Parameters_sm.h index 521831ce4a..64d0b8e761 100644 --- a/epochX/cudacpp/ee_mumu.mad/src/Parameters_sm.h +++ b/epochX/cudacpp/ee_mumu.mad/src/Parameters_sm.h @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index d48a5c4d44..8cb80f0d38 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -14,7 +14,7 @@ Running MG5 in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect 2023-08-08 * +* VERSION 3.5.2_lo_vect 2023-11-08 * * * * WARNING: UNKNOWN DEVELOPMENT VERSION. * * WARNING: DO NOT USE FOR PRODUCTION * @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005533456802368164  +DEBUG: model prefixing takes 0.005633831024169922  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,14 +174,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.271 s +ALOHA: aloha creates 4 routines in 0.267 s FFV1 FFV1 FFV2 @@ -201,6 +201,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.669s -user 0m0.609s -sys 0m0.053s +real 0m3.653s +user 0m0.601s +sys 0m0.049s diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc index c0ab4edb92..684bd53bf5 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.h b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.h index f2ef5c1e14..77b610753c 100644 --- a/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.h +++ b/epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h index 19819e2451..9fa30cfd7f 100644 --- a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h @@ -8,7 +8,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/ee_mumu.sa/src/Parameters_sm.cc b/epochX/cudacpp/ee_mumu.sa/src/Parameters_sm.cc index 31f620c44e..0b4be4d5ed 100644 --- a/epochX/cudacpp/ee_mumu.sa/src/Parameters_sm.cc +++ b/epochX/cudacpp/ee_mumu.sa/src/Parameters_sm.cc @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/ee_mumu.sa/src/Parameters_sm.h b/epochX/cudacpp/ee_mumu.sa/src/Parameters_sm.h index 521831ce4a..64d0b8e761 100644 --- a/epochX/cudacpp/ee_mumu.sa/src/Parameters_sm.h +++ b/epochX/cudacpp/ee_mumu.sa/src/Parameters_sm.h @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 2460cf072a..a1fa47508f 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -14,7 +14,7 @@ Running MG5 in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect 2023-08-08 * +* VERSION 3.5.2_lo_vect 2023-11-08 * * * * WARNING: UNKNOWN DEVELOPMENT VERSION. * * WARNING: DO NOT USE FOR PRODUCTION * @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005415439605712891  +DEBUG: model prefixing takes 0.005694150924682617  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -184,23 +184,23 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.106 s +Wrote files for 10 helas calls in 0.101 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.148 s +ALOHA: aloha creates 2 routines in 0.145 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.134 s +ALOHA: aloha creates 4 routines in 0.132 s VVV1 FFV1 FFV1 @@ -225,6 +225,7 @@ patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py +Hunk #1 succeeded at 391 (offset 6 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f @@ -237,9 +238,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.735s -user 0m1.507s -sys 0m0.213s +real 0m4.772s +user 0m1.470s +sys 0m0.223s ************************************************************ * * * W E L C O M E to * @@ -252,7 +253,7 @@ sys 0m0.213s * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect * +* VERSION 3.5.2_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * @@ -285,7 +286,7 @@ launch in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect * +* VERSION 3.5.2_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat index 2a2fd25453..4c14989a3f 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat @@ -8,7 +8,7 @@ #* * * * #* * #* * -#* VERSION 3.5.1_lo_vect 2023-08-08 * +#* VERSION 3.5.2_lo_vect 2023-11-08 * #* * #* WARNING: UNKNOWN DEVELOPMENT VERSION. * #* WARNING: DO NOT USE FOR PRODUCTION * diff --git a/epochX/cudacpp/gg_tt.mad/MGMEVersion.txt b/epochX/cudacpp/gg_tt.mad/MGMEVersion.txt index 1c1a95761b..85c67c3554 100644 --- a/epochX/cudacpp/gg_tt.mad/MGMEVersion.txt +++ b/epochX/cudacpp/gg_tt.mad/MGMEVersion.txt @@ -1 +1 @@ -3.5.1_lo_vect \ No newline at end of file +3.5.2_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/gg_tt.mad/SubProcesses/MGVersion.txt index 1c1a95761b..85c67c3554 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/MGVersion.txt +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/MGVersion.txt @@ -1 +1 @@ -3.5.1_lo_vect \ No newline at end of file +3.5.2_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 02f655f48c..d2e7a3c91d 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h index 0c2d2b0687..3ebd92c038 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f index fe184caddf..d80d770784 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index 5a3da931f2..9346ee4c6a 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -216,7 +216,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f index daea73a6df..0c2ce6ec40 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -301,7 +301,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/genps.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/genps.f index fe9c61504b..c00e33d954 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/genps.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/genps.f @@ -1877,12 +1877,12 @@ double precision function get_channel_cut(p, config) d1 = iforest(1, -i, config) d2 = iforest(2, -i, config) do j=0,3 - if (d1.gt.0.and.d1.le.2) then + if (d1.gt.0.and.d1.le.nincoming) then ptemp(j,-i) = ptemp(j,-i) - ptemp(j, d1) else ptemp(j,-i) = ptemp(j,-i)+ptemp(j, d1) endif - if (d2.gt.0.and.d2.le.2) then + if (d2.gt.0.and.d2.le.nincoming) then ptemp(j,-i) = ptemp(j,-i) - ptemp(j, d2) else ptemp(j,-i) = ptemp(j,-i)+ptemp(j, d2) diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/__init__.py b/epochX/cudacpp/gg_tt.mad/bin/internal/__init__.py index 16e60d8182..0d17042f0d 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/__init__.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/__init__.py @@ -26,6 +26,7 @@ class aMCatNLOError(MadGraph5Error): import os import logging import time +pjoin = os.path.join #Look for basic file position MG5DIR and MG4DIR MG5DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py index ef1bf58979..3995ce8109 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py @@ -2704,7 +2704,8 @@ def __new__(cls, finput=None, **opt): except Exception as error: import launch_plugin target_class = launch_plugin.RunCard - + elif issubclass(finput, RunCard): + target_class = finput else: return None @@ -2968,11 +2969,12 @@ def write(self, output_file, template=None, python_template=False, if python_template and not to_write: import string if self.blocks: - text = string.Template(text) mapping = {} for b in self.blocks: mapping[b.name] = b.get_template(self) - text = text.substitute(mapping) + if "$%s" % b.name not in text: + text += "\n$%s\n" % b.name + text = string.Template(text).substitute(mapping) if not self.list_parameter: text = text % self @@ -4875,6 +4877,9 @@ def create_default_for_process(self, proc_characteristic, history, proc_def): continue break + if proc_characteristic['ninitial'] == 1: + self['SDE_strategy'] =1 + if 'MLM' in proc_characteristic['limitations']: if self['dynamical_scale_choice'] == -1: self['dynamical_scale_choice'] = 3 @@ -5940,7 +5945,7 @@ def default_setup(self): self.add_param("CheckCycle", 3) self.add_param("MaxAttempts", 10) self.add_param("ZeroThres", 1e-9) - self.add_param("OSThres", 1.0e-13) + self.add_param("OSThres", 1.0e-8) self.add_param("DoubleCheckHelicityFilter", True) self.add_param("WriteOutFilters", True) self.add_param("UseLoopFilter", False) diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_tt.mad/bin/internal/common_run_interface.py index 14c7f310dc..87cb4b88df 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/common_run_interface.py @@ -4906,6 +4906,7 @@ def __init__(self, question, cards=[], from_banner=None, banner=None, mode='auto self.load_default() self.define_paths(**opt) self.last_editline_pos = 0 + self.update_dependent_done = False if 'allow_arg' not in opt or not opt['allow_arg']: # add some mininal content for this: @@ -6585,7 +6586,9 @@ def postcmd(self, stop, line): self.check_card_consistency() if self.param_consistency: try: - self.do_update('dependent', timer=20) + if not self.update_dependent_done: + self.do_update('dependent', timer=20) + self.update_dependent_done = False except MadGraph5Error as error: if 'Missing block:' in str(error): self.fail_due_to_format +=1 @@ -6638,6 +6641,8 @@ def do_update(self, line, timer=0): self.update_dependent(self.mother_interface, self.me_dir, self.param_card, self.paths['param'], timer, run_card=self.run_card, lhapdfconfig=self.lhapdf) + self.update_dependent_done = True + elif args[0] == 'missing': self.update_missing() @@ -6717,12 +6722,13 @@ class TimeOutError(Exception): def handle_alarm(signum, frame): raise TimeOutError signal.signal(signal.SIGALRM, handle_alarm) + if timer: - signal.alarm(timer) log_level=30 else: log_level=20 + if run_card: as_for_pdf = {'cteq6_m': 0.118, 'cteq6_d': 0.118, @@ -6781,6 +6787,10 @@ def handle_alarm(signum, frame): logger.log(log_level, "update the strong coupling value (alpha_s) to the value from the pdf selected: %s", as_for_pdf[pdlabel]) modify = True + if timer: + signal.alarm(timer) + + # Try to load the model in the limited amount of time allowed try: model = mecmd.get_model() @@ -6909,7 +6919,8 @@ def check_block(self, blockname): def check_answer_consistency(self): """function called if the code reads a file""" self.check_card_consistency() - self.do_update('dependent', timer=20) + if not self.update_dependent_done: + self.do_update('dependent', timer=20) def help_set(self): '''help message for set''' diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py index a88d60b282..5fd170d18d 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/gen_ximprove.py @@ -157,12 +157,16 @@ def get_helicity(self, to_submit=True, clean=True): (stdout, _) = p.communicate(''.encode()) stdout = stdout.decode('ascii',errors='ignore') - try: + if stdout: nb_channel = max([math.floor(float(d)) for d in stdout.split()]) - except Exception as error: - misc.sprint(stdout, 'no channel or error for %s' % Pdir) - continue - + else: + for matrix_file in misc.glob('matrix*orig.f', Pdir): + files.cp(matrix_file, matrix_file.replace('orig','optim')) + P_zero_result.append(Pdir) + if os.path.exists(pjoin(self.me_dir, 'error')): + os.remove(pjoin(self.me_dir, 'error')) + continue # bypass bad process + self.cmd.compile(['madevent_forhel'], cwd=Pdir) if not os.path.exists(pjoin(Pdir, 'madevent_forhel')): raise Exception('Error make madevent_forhel not successful') @@ -183,11 +187,13 @@ def get_helicity(self, to_submit=True, clean=True): #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts (stdout, _) = p.communicate(" ".encode()) stdout = stdout.decode('ascii',errors='ignore') - if os.path.exists(pjoin(self.me_dir,'error')): + if os.path.exists(pjoin(self.me_dir, 'error')): raise Exception(pjoin(self.me_dir,'error')) # note a continue is not enough here, we have in top to link # the matrixX_optim.f to matrixX_orig.f to let the code to work # after this error. + # for matrix_file in misc.glob('matrix*orig.f', Pdir): + # files.cp(matrix_file, matrix_file.replace('orig','optim')) if 'no events passed cuts' in stdout: raise Exception diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py index c9d1c7706a..0b849330ef 100644 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/launch_plugin.py @@ -57,7 +57,7 @@ def reset_simd(self, old_value, new_value, name): return Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - + def plugin_input(self, finput): return @@ -79,7 +79,7 @@ def check_validity(self): self['sde_strategy'] = 1 if self['hel_recycling']: self['hel_recycling'] = False - + class GPURunCard(CPPRunCard): def default_setup(self): super(CPPRunCard, self).default_setup() diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py index d722702891..853aabc98a 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/madevent_interface.py @@ -1,4 +1,4 @@ -################################################################################ +############################################################################### # # Copyright (c) 2011 The MadGraph5_aMC@NLO Development team and Contributors # @@ -3675,7 +3675,7 @@ def do_refine(self, line): devnull.close() ############################################################################ - def do_combine_iteration(self, line): + def do_comine_iteration(self, line): """Not in help: Combine a given iteration combine_iteration Pdir Gdir S|R step S is for survey R is for refine @@ -3703,7 +3703,7 @@ def do_combine_iteration(self, line): ############################################################################ def do_combine_events(self, line): """Advanced commands: Launch combine events""" - + start=time.time() args = self.split_arg(line) start = time.time() # Check argument's validity @@ -3798,9 +3798,7 @@ def do_combine_events(self, line): self.correct_bias() elif self.run_card['custom_fcts']: self.correct_bias() - - logger.info("combine events done in %s", time.time()-start) - + logger.info("combination of events done in %s s ", time.time()-start) self.to_store.append('event') @@ -7368,7 +7366,7 @@ def wait_monitoring(Idle, Running, Done): import optparse # Get the directory of the script real path (bin) # and add it to the current PYTHONPATH - root_path = os.path.dirname(os.path.dirname(os.path.realpath( __file__ ))) + #root_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath( __file__ )))) sys.path.insert(0, root_path) class MyOptParser(optparse.OptionParser): @@ -7411,7 +7409,13 @@ def error(self, msg=''): import logging.config # Set logging level according to the logging level given by options #logging.basicConfig(level=vars(logging)[options.logging]) + import internal import internal.coloring_logging + # internal.file = XXX/bin/internal/__init__.py + # => need three dirname to get XXX + # we use internal to have any issue with pythonpath finding the wrong file + me_dir = os.path.dirname(os.path.dirname(os.path.dirname(internal.__file__))) + print("me_dir is", me_dir) try: if __debug__ and options.logging == 'INFO': options.logging = 'DEBUG' @@ -7419,7 +7423,8 @@ def error(self, msg=''): level = int(options.logging) else: level = eval('logging.' + options.logging) - logging.config.fileConfig(os.path.join(root_path, 'internal', 'me5_logging.conf')) + log_path = os.path.join(me_dir, 'bin', 'internal', 'me5_logging.conf') + logging.config.fileConfig(log_path) logging.root.setLevel(level) logging.getLogger('madgraph').setLevel(level) except: @@ -7433,9 +7438,9 @@ def error(self, msg=''): if '--web' in args: i = args.index('--web') args.pop(i) - cmd_line = MadEventCmd(os.path.dirname(root_path),force_run=True) + cmd_line = MadEventCmd(me_dir, force_run=True) else: - cmd_line = MadEventCmdShell(os.path.dirname(root_path),force_run=True) + cmd_line = MadEventCmdShell(me_dir, force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print(parser_error) diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/misc.py b/epochX/cudacpp/gg_tt.mad/bin/internal/misc.py index d3fed3baa2..91cd3e5c22 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/misc.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/misc.py @@ -347,7 +347,7 @@ def tell(msg): if dependency=='ninja': if cmd.options['ninja'] in ['None',None,''] or\ (cmd.options['ninja'] == './HEPTools/lib' and not MG5dir is None and\ - which_lib(pjoin(MG5dir,cmd.options['ninja'],'libninja.a')) is None): + which_lib(pjoin(MG5dir,cmd.options['ninja'],'lib','libninja.a')) is None): tell("Installing ninja...") cmd.do_install('ninja') diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/shower_card.py b/epochX/cudacpp/gg_tt.mad/bin/internal/shower_card.py index c6d3948cc4..c344ea1b15 100755 --- a/epochX/cudacpp/gg_tt.mad/bin/internal/shower_card.py +++ b/epochX/cudacpp/gg_tt.mad/bin/internal/shower_card.py @@ -45,7 +45,9 @@ class ShowerCard(dict): false = ['.false.', 'f', 'false', '0'] logical_vars = ['ue_enabled', 'hadronize', 'b_stable', 'pi_stable', 'wp_stable', 'wm_stable', 'z_stable', 'h_stable', 'tap_stable', 'tam_stable', - 'mup_stable', 'mum_stable', 'is_4lep', 'is_bbar', 'combine_td'] + 'mup_stable', 'mum_stable', 'is_4lep', 'is_bbar', 'combine_td', + 'space_shower_me_corrections', 'time_shower_me_corrections', + 'time_shower_me_extended', 'time_shower_me_after_first'] string_vars = ['extralibs', 'extrapaths', 'includepaths', 'analyse'] for i in range(1,100): string_vars.append('dm_'+str(i)) @@ -82,7 +84,11 @@ class ShowerCard(dict): 'b_mass' : {'HERWIG6':'b_mass', 'PYTHIA6': 'b_mass', 'HERWIGPP': 'b_mass', 'PYTHIA8': 'b_mass'}, 'analyse' : {'HERWIG6':'hwuti', 'PYTHIA6':'pyuti', 'HERWIGPP':'hwpputi', 'PYTHIA8':'py8uti'}, 'qcut' : {'PYTHIA8':'qcut'}, - 'njmax' : {'PYTHIA8':'njmax'}} + 'njmax' : {'PYTHIA8':'njmax'}, + 'space_shower_me_corrections' : {'PYTHIA8':'space_shower_me_corrections'}, + 'time_shower_me_corrections' : {'PYTHIA8':'time_shower_me_corrections'}, + 'time_shower_me_extended' : {'PYTHIA8':'time_shower_me_extended'}, + 'time_shower_me_after_first' : {'PYTHIA8':'time_shower_me_after_first'}} stdhep_dict = {'HERWIG6':'mcatnlo_hwan_stdhep.o', 'PYTHIA6':'mcatnlo_pyan_stdhep.o'} diff --git a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h index 07d0bfa887..55f43bb43a 100644 --- a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h @@ -8,7 +8,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt.mad/src/Parameters_sm.cc b/epochX/cudacpp/gg_tt.mad/src/Parameters_sm.cc index 3452d1e8da..a9bc93ff98 100644 --- a/epochX/cudacpp/gg_tt.mad/src/Parameters_sm.cc +++ b/epochX/cudacpp/gg_tt.mad/src/Parameters_sm.cc @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt.mad/src/Parameters_sm.h b/epochX/cudacpp/gg_tt.mad/src/Parameters_sm.h index 4f6f322ed9..932f123fea 100644 --- a/epochX/cudacpp/gg_tt.mad/src/Parameters_sm.h +++ b/epochX/cudacpp/gg_tt.mad/src/Parameters_sm.h @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index f9425b6b07..805df19bd9 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -14,7 +14,7 @@ Running MG5 in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect 2023-08-08 * +* VERSION 3.5.2_lo_vect 2023-11-08 * * * * WARNING: UNKNOWN DEVELOPMENT VERSION. * * WARNING: DO NOT USE FOR PRODUCTION * @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057506561279296875  +DEBUG: model prefixing takes 0.00567626953125  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,7 +180,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.148 s +ALOHA: aloha creates 2 routines in 0.143 s VVV1 FFV1 FFV1 @@ -196,6 +196,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/s DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.684s -user 0m0.481s -sys 0m0.057s +real 0m3.529s +user 0m0.478s +sys 0m0.048s diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc index 141d1f24ac..0e44ef42c3 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.h b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.h index 0c2d2b0687..3ebd92c038 100644 --- a/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.h +++ b/epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h index 07d0bfa887..55f43bb43a 100644 --- a/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h @@ -8,7 +8,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt.sa/src/Parameters_sm.cc b/epochX/cudacpp/gg_tt.sa/src/Parameters_sm.cc index 3452d1e8da..a9bc93ff98 100644 --- a/epochX/cudacpp/gg_tt.sa/src/Parameters_sm.cc +++ b/epochX/cudacpp/gg_tt.sa/src/Parameters_sm.cc @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt.sa/src/Parameters_sm.h b/epochX/cudacpp/gg_tt.sa/src/Parameters_sm.h index 4f6f322ed9..932f123fea 100644 --- a/epochX/cudacpp/gg_tt.sa/src/Parameters_sm.h +++ b/epochX/cudacpp/gg_tt.sa/src/Parameters_sm.h @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 2db08eff10..9d4dbd85f0 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -14,7 +14,7 @@ Running MG5 in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect 2023-08-08 * +* VERSION 3.5.2_lo_vect 2023-11-08 * * * * WARNING: UNKNOWN DEVELOPMENT VERSION. * * WARNING: DO NOT USE FOR PRODUCTION * @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005487680435180664  +DEBUG: model prefixing takes 0.005400419235229492  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -163,7 +163,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.020 s +1 processes with 16 diagrams generated in 0.019 s Total: 2 processes with 19 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -185,7 +185,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P2_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -194,15 +194,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -211,21 +211,21 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx -Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s -Wrote files for 46 helas calls in 0.247 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s +Wrote files for 46 helas calls in 0.242 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.331 s +ALOHA: aloha creates 5 routines in 0.324 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -233,7 +233,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.314 s +ALOHA: aloha creates 10 routines in 0.308 s VVV1 VVV1 FFV1 @@ -263,6 +263,7 @@ patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py +Hunk #1 succeeded at 391 (offset 6 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f @@ -283,9 +284,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.331s -user 0m2.084s -sys 0m0.240s +real 0m5.282s +user 0m2.049s +sys 0m0.227s ************************************************************ * * * W E L C O M E to * @@ -298,7 +299,7 @@ sys 0m0.240s * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect * +* VERSION 3.5.2_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * @@ -331,7 +332,7 @@ launch in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect * +* VERSION 3.5.2_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat index cdb64729b1..d0845f65f5 100644 --- a/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat @@ -8,7 +8,7 @@ #* * * * #* * #* * -#* VERSION 3.5.1_lo_vect 2023-08-08 * +#* VERSION 3.5.2_lo_vect 2023-11-08 * #* * #* WARNING: UNKNOWN DEVELOPMENT VERSION. * #* WARNING: DO NOT USE FOR PRODUCTION * diff --git a/epochX/cudacpp/gg_tt01g.mad/MGMEVersion.txt b/epochX/cudacpp/gg_tt01g.mad/MGMEVersion.txt index 1c1a95761b..85c67c3554 100644 --- a/epochX/cudacpp/gg_tt01g.mad/MGMEVersion.txt +++ b/epochX/cudacpp/gg_tt01g.mad/MGMEVersion.txt @@ -1 +1 @@ -3.5.1_lo_vect \ No newline at end of file +3.5.2_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MGVersion.txt index 1c1a95761b..85c67c3554 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MGVersion.txt +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MGVersion.txt @@ -1 +1 @@ -3.5.1_lo_vect \ No newline at end of file +3.5.2_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index 02f655f48c..d2e7a3c91d 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.h b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.h index 0c2d2b0687..3ebd92c038 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.h +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f index fe184caddf..d80d770784 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index 5a3da931f2..9346ee4c6a 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -216,7 +216,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f index daea73a6df..0c2ce6ec40 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -301,7 +301,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc index ce1badffca..1e24c2819d 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.h b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.h index 248ed1ec9e..3901ddcb20 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.h +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f index f751e9f14a..53ca75eaf4 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f index 6eb0fa0827..d6c6f42c9e 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -216,7 +216,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f index 02f406668c..5c91f2448c 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -317,7 +317,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/genps.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/genps.f index fe9c61504b..c00e33d954 100644 --- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/genps.f +++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/genps.f @@ -1877,12 +1877,12 @@ double precision function get_channel_cut(p, config) d1 = iforest(1, -i, config) d2 = iforest(2, -i, config) do j=0,3 - if (d1.gt.0.and.d1.le.2) then + if (d1.gt.0.and.d1.le.nincoming) then ptemp(j,-i) = ptemp(j,-i) - ptemp(j, d1) else ptemp(j,-i) = ptemp(j,-i)+ptemp(j, d1) endif - if (d2.gt.0.and.d2.le.2) then + if (d2.gt.0.and.d2.le.nincoming) then ptemp(j,-i) = ptemp(j,-i) - ptemp(j, d2) else ptemp(j,-i) = ptemp(j,-i)+ptemp(j, d2) diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/__init__.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/__init__.py index 16e60d8182..0d17042f0d 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/__init__.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/__init__.py @@ -26,6 +26,7 @@ class aMCatNLOError(MadGraph5Error): import os import logging import time +pjoin = os.path.join #Look for basic file position MG5DIR and MG4DIR MG5DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py index ef1bf58979..3995ce8109 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py @@ -2704,7 +2704,8 @@ def __new__(cls, finput=None, **opt): except Exception as error: import launch_plugin target_class = launch_plugin.RunCard - + elif issubclass(finput, RunCard): + target_class = finput else: return None @@ -2968,11 +2969,12 @@ def write(self, output_file, template=None, python_template=False, if python_template and not to_write: import string if self.blocks: - text = string.Template(text) mapping = {} for b in self.blocks: mapping[b.name] = b.get_template(self) - text = text.substitute(mapping) + if "$%s" % b.name not in text: + text += "\n$%s\n" % b.name + text = string.Template(text).substitute(mapping) if not self.list_parameter: text = text % self @@ -4875,6 +4877,9 @@ def create_default_for_process(self, proc_characteristic, history, proc_def): continue break + if proc_characteristic['ninitial'] == 1: + self['SDE_strategy'] =1 + if 'MLM' in proc_characteristic['limitations']: if self['dynamical_scale_choice'] == -1: self['dynamical_scale_choice'] = 3 @@ -5940,7 +5945,7 @@ def default_setup(self): self.add_param("CheckCycle", 3) self.add_param("MaxAttempts", 10) self.add_param("ZeroThres", 1e-9) - self.add_param("OSThres", 1.0e-13) + self.add_param("OSThres", 1.0e-8) self.add_param("DoubleCheckHelicityFilter", True) self.add_param("WriteOutFilters", True) self.add_param("UseLoopFilter", False) diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/common_run_interface.py index 14c7f310dc..87cb4b88df 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/common_run_interface.py @@ -4906,6 +4906,7 @@ def __init__(self, question, cards=[], from_banner=None, banner=None, mode='auto self.load_default() self.define_paths(**opt) self.last_editline_pos = 0 + self.update_dependent_done = False if 'allow_arg' not in opt or not opt['allow_arg']: # add some mininal content for this: @@ -6585,7 +6586,9 @@ def postcmd(self, stop, line): self.check_card_consistency() if self.param_consistency: try: - self.do_update('dependent', timer=20) + if not self.update_dependent_done: + self.do_update('dependent', timer=20) + self.update_dependent_done = False except MadGraph5Error as error: if 'Missing block:' in str(error): self.fail_due_to_format +=1 @@ -6638,6 +6641,8 @@ def do_update(self, line, timer=0): self.update_dependent(self.mother_interface, self.me_dir, self.param_card, self.paths['param'], timer, run_card=self.run_card, lhapdfconfig=self.lhapdf) + self.update_dependent_done = True + elif args[0] == 'missing': self.update_missing() @@ -6717,12 +6722,13 @@ class TimeOutError(Exception): def handle_alarm(signum, frame): raise TimeOutError signal.signal(signal.SIGALRM, handle_alarm) + if timer: - signal.alarm(timer) log_level=30 else: log_level=20 + if run_card: as_for_pdf = {'cteq6_m': 0.118, 'cteq6_d': 0.118, @@ -6781,6 +6787,10 @@ def handle_alarm(signum, frame): logger.log(log_level, "update the strong coupling value (alpha_s) to the value from the pdf selected: %s", as_for_pdf[pdlabel]) modify = True + if timer: + signal.alarm(timer) + + # Try to load the model in the limited amount of time allowed try: model = mecmd.get_model() @@ -6909,7 +6919,8 @@ def check_block(self, blockname): def check_answer_consistency(self): """function called if the code reads a file""" self.check_card_consistency() - self.do_update('dependent', timer=20) + if not self.update_dependent_done: + self.do_update('dependent', timer=20) def help_set(self): '''help message for set''' diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/gen_ximprove.py index a88d60b282..5fd170d18d 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/gen_ximprove.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/gen_ximprove.py @@ -157,12 +157,16 @@ def get_helicity(self, to_submit=True, clean=True): (stdout, _) = p.communicate(''.encode()) stdout = stdout.decode('ascii',errors='ignore') - try: + if stdout: nb_channel = max([math.floor(float(d)) for d in stdout.split()]) - except Exception as error: - misc.sprint(stdout, 'no channel or error for %s' % Pdir) - continue - + else: + for matrix_file in misc.glob('matrix*orig.f', Pdir): + files.cp(matrix_file, matrix_file.replace('orig','optim')) + P_zero_result.append(Pdir) + if os.path.exists(pjoin(self.me_dir, 'error')): + os.remove(pjoin(self.me_dir, 'error')) + continue # bypass bad process + self.cmd.compile(['madevent_forhel'], cwd=Pdir) if not os.path.exists(pjoin(Pdir, 'madevent_forhel')): raise Exception('Error make madevent_forhel not successful') @@ -183,11 +187,13 @@ def get_helicity(self, to_submit=True, clean=True): #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts (stdout, _) = p.communicate(" ".encode()) stdout = stdout.decode('ascii',errors='ignore') - if os.path.exists(pjoin(self.me_dir,'error')): + if os.path.exists(pjoin(self.me_dir, 'error')): raise Exception(pjoin(self.me_dir,'error')) # note a continue is not enough here, we have in top to link # the matrixX_optim.f to matrixX_orig.f to let the code to work # after this error. + # for matrix_file in misc.glob('matrix*orig.f', Pdir): + # files.cp(matrix_file, matrix_file.replace('orig','optim')) if 'no events passed cuts' in stdout: raise Exception diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py index c9d1c7706a..0b849330ef 100644 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py @@ -57,7 +57,7 @@ def reset_simd(self, old_value, new_value, name): return Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - + def plugin_input(self, finput): return @@ -79,7 +79,7 @@ def check_validity(self): self['sde_strategy'] = 1 if self['hel_recycling']: self['hel_recycling'] = False - + class GPURunCard(CPPRunCard): def default_setup(self): super(CPPRunCard, self).default_setup() diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/madevent_interface.py index d722702891..853aabc98a 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/madevent_interface.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/madevent_interface.py @@ -1,4 +1,4 @@ -################################################################################ +############################################################################### # # Copyright (c) 2011 The MadGraph5_aMC@NLO Development team and Contributors # @@ -3675,7 +3675,7 @@ def do_refine(self, line): devnull.close() ############################################################################ - def do_combine_iteration(self, line): + def do_comine_iteration(self, line): """Not in help: Combine a given iteration combine_iteration Pdir Gdir S|R step S is for survey R is for refine @@ -3703,7 +3703,7 @@ def do_combine_iteration(self, line): ############################################################################ def do_combine_events(self, line): """Advanced commands: Launch combine events""" - + start=time.time() args = self.split_arg(line) start = time.time() # Check argument's validity @@ -3798,9 +3798,7 @@ def do_combine_events(self, line): self.correct_bias() elif self.run_card['custom_fcts']: self.correct_bias() - - logger.info("combine events done in %s", time.time()-start) - + logger.info("combination of events done in %s s ", time.time()-start) self.to_store.append('event') @@ -7368,7 +7366,7 @@ def wait_monitoring(Idle, Running, Done): import optparse # Get the directory of the script real path (bin) # and add it to the current PYTHONPATH - root_path = os.path.dirname(os.path.dirname(os.path.realpath( __file__ ))) + #root_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath( __file__ )))) sys.path.insert(0, root_path) class MyOptParser(optparse.OptionParser): @@ -7411,7 +7409,13 @@ def error(self, msg=''): import logging.config # Set logging level according to the logging level given by options #logging.basicConfig(level=vars(logging)[options.logging]) + import internal import internal.coloring_logging + # internal.file = XXX/bin/internal/__init__.py + # => need three dirname to get XXX + # we use internal to have any issue with pythonpath finding the wrong file + me_dir = os.path.dirname(os.path.dirname(os.path.dirname(internal.__file__))) + print("me_dir is", me_dir) try: if __debug__ and options.logging == 'INFO': options.logging = 'DEBUG' @@ -7419,7 +7423,8 @@ def error(self, msg=''): level = int(options.logging) else: level = eval('logging.' + options.logging) - logging.config.fileConfig(os.path.join(root_path, 'internal', 'me5_logging.conf')) + log_path = os.path.join(me_dir, 'bin', 'internal', 'me5_logging.conf') + logging.config.fileConfig(log_path) logging.root.setLevel(level) logging.getLogger('madgraph').setLevel(level) except: @@ -7433,9 +7438,9 @@ def error(self, msg=''): if '--web' in args: i = args.index('--web') args.pop(i) - cmd_line = MadEventCmd(os.path.dirname(root_path),force_run=True) + cmd_line = MadEventCmd(me_dir, force_run=True) else: - cmd_line = MadEventCmdShell(os.path.dirname(root_path),force_run=True) + cmd_line = MadEventCmdShell(me_dir, force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print(parser_error) diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/misc.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/misc.py index d3fed3baa2..91cd3e5c22 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/misc.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/misc.py @@ -347,7 +347,7 @@ def tell(msg): if dependency=='ninja': if cmd.options['ninja'] in ['None',None,''] or\ (cmd.options['ninja'] == './HEPTools/lib' and not MG5dir is None and\ - which_lib(pjoin(MG5dir,cmd.options['ninja'],'libninja.a')) is None): + which_lib(pjoin(MG5dir,cmd.options['ninja'],'lib','libninja.a')) is None): tell("Installing ninja...") cmd.do_install('ninja') diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/shower_card.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/shower_card.py index c6d3948cc4..c344ea1b15 100755 --- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/shower_card.py +++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/shower_card.py @@ -45,7 +45,9 @@ class ShowerCard(dict): false = ['.false.', 'f', 'false', '0'] logical_vars = ['ue_enabled', 'hadronize', 'b_stable', 'pi_stable', 'wp_stable', 'wm_stable', 'z_stable', 'h_stable', 'tap_stable', 'tam_stable', - 'mup_stable', 'mum_stable', 'is_4lep', 'is_bbar', 'combine_td'] + 'mup_stable', 'mum_stable', 'is_4lep', 'is_bbar', 'combine_td', + 'space_shower_me_corrections', 'time_shower_me_corrections', + 'time_shower_me_extended', 'time_shower_me_after_first'] string_vars = ['extralibs', 'extrapaths', 'includepaths', 'analyse'] for i in range(1,100): string_vars.append('dm_'+str(i)) @@ -82,7 +84,11 @@ class ShowerCard(dict): 'b_mass' : {'HERWIG6':'b_mass', 'PYTHIA6': 'b_mass', 'HERWIGPP': 'b_mass', 'PYTHIA8': 'b_mass'}, 'analyse' : {'HERWIG6':'hwuti', 'PYTHIA6':'pyuti', 'HERWIGPP':'hwpputi', 'PYTHIA8':'py8uti'}, 'qcut' : {'PYTHIA8':'qcut'}, - 'njmax' : {'PYTHIA8':'njmax'}} + 'njmax' : {'PYTHIA8':'njmax'}, + 'space_shower_me_corrections' : {'PYTHIA8':'space_shower_me_corrections'}, + 'time_shower_me_corrections' : {'PYTHIA8':'time_shower_me_corrections'}, + 'time_shower_me_extended' : {'PYTHIA8':'time_shower_me_extended'}, + 'time_shower_me_after_first' : {'PYTHIA8':'time_shower_me_after_first'}} stdhep_dict = {'HERWIG6':'mcatnlo_hwan_stdhep.o', 'PYTHIA6':'mcatnlo_pyan_stdhep.o'} diff --git a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h index 8995b15c82..361b488401 100644 --- a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h @@ -8,7 +8,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt01g.mad/src/Parameters_sm.cc b/epochX/cudacpp/gg_tt01g.mad/src/Parameters_sm.cc index 9d09eb6b62..64fc3fea62 100644 --- a/epochX/cudacpp/gg_tt01g.mad/src/Parameters_sm.cc +++ b/epochX/cudacpp/gg_tt01g.mad/src/Parameters_sm.cc @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt01g.mad/src/Parameters_sm.h b/epochX/cudacpp/gg_tt01g.mad/src/Parameters_sm.h index 6b32c66b9b..b6568d3761 100644 --- a/epochX/cudacpp/gg_tt01g.mad/src/Parameters_sm.h +++ b/epochX/cudacpp/gg_tt01g.mad/src/Parameters_sm.h @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index 5643c4439c..68afa8d9b0 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -14,7 +14,7 @@ Running MG5 in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect 2023-08-08 * +* VERSION 3.5.2_lo_vect 2023-11-08 * * * * WARNING: UNKNOWN DEVELOPMENT VERSION. * * WARNING: DO NOT USE FOR PRODUCTION * @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005489349365234375  +DEBUG: model prefixing takes 0.005378007888793945  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.022 s +1 processes with 16 diagrams generated in 0.021 s Total: 1 processes with 16 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 INFO: Processing color information for process: g g > t t~ g @1 INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -184,21 +184,21 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s -Wrote files for 36 helas calls in 0.151 s +Wrote files for 36 helas calls in 0.148 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.330 s +ALOHA: aloha creates 5 routines in 0.323 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.314 s +ALOHA: aloha creates 10 routines in 0.309 s VVV1 VVV1 FFV1 @@ -236,6 +236,7 @@ patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py +Hunk #1 succeeded at 391 (offset 6 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f @@ -252,9 +253,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.275s -user 0m1.969s -sys 0m0.229s +real 0m5.147s +user 0m1.924s +sys 0m0.225s ************************************************************ * * * W E L C O M E to * @@ -267,7 +268,7 @@ sys 0m0.229s * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect * +* VERSION 3.5.2_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * @@ -300,7 +301,7 @@ launch in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect * +* VERSION 3.5.2_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat index 3af4991f01..a0ffbbc219 100644 --- a/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat @@ -8,7 +8,7 @@ #* * * * #* * #* * -#* VERSION 3.5.1_lo_vect 2023-08-08 * +#* VERSION 3.5.2_lo_vect 2023-11-08 * #* * #* WARNING: UNKNOWN DEVELOPMENT VERSION. * #* WARNING: DO NOT USE FOR PRODUCTION * diff --git a/epochX/cudacpp/gg_ttg.mad/MGMEVersion.txt b/epochX/cudacpp/gg_ttg.mad/MGMEVersion.txt index 1c1a95761b..85c67c3554 100644 --- a/epochX/cudacpp/gg_ttg.mad/MGMEVersion.txt +++ b/epochX/cudacpp/gg_ttg.mad/MGMEVersion.txt @@ -1 +1 @@ -3.5.1_lo_vect \ No newline at end of file +3.5.2_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MGVersion.txt index 1c1a95761b..85c67c3554 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MGVersion.txt +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MGVersion.txt @@ -1 +1 @@ -3.5.1_lo_vect \ No newline at end of file +3.5.2_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index f7f5899260..5e2bf0d19a 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h index 9f559fe3ae..37d6ebe981 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f index d528b1d2f0..dd4cd3a0c2 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index 110e204c24..e28575ead8 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -216,7 +216,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f index bf665ff6e0..a885b7fde3 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -317,7 +317,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/genps.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/genps.f index fe9c61504b..c00e33d954 100644 --- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/genps.f +++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/genps.f @@ -1877,12 +1877,12 @@ double precision function get_channel_cut(p, config) d1 = iforest(1, -i, config) d2 = iforest(2, -i, config) do j=0,3 - if (d1.gt.0.and.d1.le.2) then + if (d1.gt.0.and.d1.le.nincoming) then ptemp(j,-i) = ptemp(j,-i) - ptemp(j, d1) else ptemp(j,-i) = ptemp(j,-i)+ptemp(j, d1) endif - if (d2.gt.0.and.d2.le.2) then + if (d2.gt.0.and.d2.le.nincoming) then ptemp(j,-i) = ptemp(j,-i) - ptemp(j, d2) else ptemp(j,-i) = ptemp(j,-i)+ptemp(j, d2) diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/__init__.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/__init__.py index 16e60d8182..0d17042f0d 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/__init__.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/__init__.py @@ -26,6 +26,7 @@ class aMCatNLOError(MadGraph5Error): import os import logging import time +pjoin = os.path.join #Look for basic file position MG5DIR and MG4DIR MG5DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py index ef1bf58979..3995ce8109 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py @@ -2704,7 +2704,8 @@ def __new__(cls, finput=None, **opt): except Exception as error: import launch_plugin target_class = launch_plugin.RunCard - + elif issubclass(finput, RunCard): + target_class = finput else: return None @@ -2968,11 +2969,12 @@ def write(self, output_file, template=None, python_template=False, if python_template and not to_write: import string if self.blocks: - text = string.Template(text) mapping = {} for b in self.blocks: mapping[b.name] = b.get_template(self) - text = text.substitute(mapping) + if "$%s" % b.name not in text: + text += "\n$%s\n" % b.name + text = string.Template(text).substitute(mapping) if not self.list_parameter: text = text % self @@ -4875,6 +4877,9 @@ def create_default_for_process(self, proc_characteristic, history, proc_def): continue break + if proc_characteristic['ninitial'] == 1: + self['SDE_strategy'] =1 + if 'MLM' in proc_characteristic['limitations']: if self['dynamical_scale_choice'] == -1: self['dynamical_scale_choice'] = 3 @@ -5940,7 +5945,7 @@ def default_setup(self): self.add_param("CheckCycle", 3) self.add_param("MaxAttempts", 10) self.add_param("ZeroThres", 1e-9) - self.add_param("OSThres", 1.0e-13) + self.add_param("OSThres", 1.0e-8) self.add_param("DoubleCheckHelicityFilter", True) self.add_param("WriteOutFilters", True) self.add_param("UseLoopFilter", False) diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/common_run_interface.py index 14c7f310dc..87cb4b88df 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/common_run_interface.py @@ -4906,6 +4906,7 @@ def __init__(self, question, cards=[], from_banner=None, banner=None, mode='auto self.load_default() self.define_paths(**opt) self.last_editline_pos = 0 + self.update_dependent_done = False if 'allow_arg' not in opt or not opt['allow_arg']: # add some mininal content for this: @@ -6585,7 +6586,9 @@ def postcmd(self, stop, line): self.check_card_consistency() if self.param_consistency: try: - self.do_update('dependent', timer=20) + if not self.update_dependent_done: + self.do_update('dependent', timer=20) + self.update_dependent_done = False except MadGraph5Error as error: if 'Missing block:' in str(error): self.fail_due_to_format +=1 @@ -6638,6 +6641,8 @@ def do_update(self, line, timer=0): self.update_dependent(self.mother_interface, self.me_dir, self.param_card, self.paths['param'], timer, run_card=self.run_card, lhapdfconfig=self.lhapdf) + self.update_dependent_done = True + elif args[0] == 'missing': self.update_missing() @@ -6717,12 +6722,13 @@ class TimeOutError(Exception): def handle_alarm(signum, frame): raise TimeOutError signal.signal(signal.SIGALRM, handle_alarm) + if timer: - signal.alarm(timer) log_level=30 else: log_level=20 + if run_card: as_for_pdf = {'cteq6_m': 0.118, 'cteq6_d': 0.118, @@ -6781,6 +6787,10 @@ def handle_alarm(signum, frame): logger.log(log_level, "update the strong coupling value (alpha_s) to the value from the pdf selected: %s", as_for_pdf[pdlabel]) modify = True + if timer: + signal.alarm(timer) + + # Try to load the model in the limited amount of time allowed try: model = mecmd.get_model() @@ -6909,7 +6919,8 @@ def check_block(self, blockname): def check_answer_consistency(self): """function called if the code reads a file""" self.check_card_consistency() - self.do_update('dependent', timer=20) + if not self.update_dependent_done: + self.do_update('dependent', timer=20) def help_set(self): '''help message for set''' diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/gen_ximprove.py index a88d60b282..5fd170d18d 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/gen_ximprove.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/gen_ximprove.py @@ -157,12 +157,16 @@ def get_helicity(self, to_submit=True, clean=True): (stdout, _) = p.communicate(''.encode()) stdout = stdout.decode('ascii',errors='ignore') - try: + if stdout: nb_channel = max([math.floor(float(d)) for d in stdout.split()]) - except Exception as error: - misc.sprint(stdout, 'no channel or error for %s' % Pdir) - continue - + else: + for matrix_file in misc.glob('matrix*orig.f', Pdir): + files.cp(matrix_file, matrix_file.replace('orig','optim')) + P_zero_result.append(Pdir) + if os.path.exists(pjoin(self.me_dir, 'error')): + os.remove(pjoin(self.me_dir, 'error')) + continue # bypass bad process + self.cmd.compile(['madevent_forhel'], cwd=Pdir) if not os.path.exists(pjoin(Pdir, 'madevent_forhel')): raise Exception('Error make madevent_forhel not successful') @@ -183,11 +187,13 @@ def get_helicity(self, to_submit=True, clean=True): #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts (stdout, _) = p.communicate(" ".encode()) stdout = stdout.decode('ascii',errors='ignore') - if os.path.exists(pjoin(self.me_dir,'error')): + if os.path.exists(pjoin(self.me_dir, 'error')): raise Exception(pjoin(self.me_dir,'error')) # note a continue is not enough here, we have in top to link # the matrixX_optim.f to matrixX_orig.f to let the code to work # after this error. + # for matrix_file in misc.glob('matrix*orig.f', Pdir): + # files.cp(matrix_file, matrix_file.replace('orig','optim')) if 'no events passed cuts' in stdout: raise Exception diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py index c9d1c7706a..0b849330ef 100644 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py @@ -57,7 +57,7 @@ def reset_simd(self, old_value, new_value, name): return Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - + def plugin_input(self, finput): return @@ -79,7 +79,7 @@ def check_validity(self): self['sde_strategy'] = 1 if self['hel_recycling']: self['hel_recycling'] = False - + class GPURunCard(CPPRunCard): def default_setup(self): super(CPPRunCard, self).default_setup() diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/madevent_interface.py index d722702891..853aabc98a 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/madevent_interface.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/madevent_interface.py @@ -1,4 +1,4 @@ -################################################################################ +############################################################################### # # Copyright (c) 2011 The MadGraph5_aMC@NLO Development team and Contributors # @@ -3675,7 +3675,7 @@ def do_refine(self, line): devnull.close() ############################################################################ - def do_combine_iteration(self, line): + def do_comine_iteration(self, line): """Not in help: Combine a given iteration combine_iteration Pdir Gdir S|R step S is for survey R is for refine @@ -3703,7 +3703,7 @@ def do_combine_iteration(self, line): ############################################################################ def do_combine_events(self, line): """Advanced commands: Launch combine events""" - + start=time.time() args = self.split_arg(line) start = time.time() # Check argument's validity @@ -3798,9 +3798,7 @@ def do_combine_events(self, line): self.correct_bias() elif self.run_card['custom_fcts']: self.correct_bias() - - logger.info("combine events done in %s", time.time()-start) - + logger.info("combination of events done in %s s ", time.time()-start) self.to_store.append('event') @@ -7368,7 +7366,7 @@ def wait_monitoring(Idle, Running, Done): import optparse # Get the directory of the script real path (bin) # and add it to the current PYTHONPATH - root_path = os.path.dirname(os.path.dirname(os.path.realpath( __file__ ))) + #root_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath( __file__ )))) sys.path.insert(0, root_path) class MyOptParser(optparse.OptionParser): @@ -7411,7 +7409,13 @@ def error(self, msg=''): import logging.config # Set logging level according to the logging level given by options #logging.basicConfig(level=vars(logging)[options.logging]) + import internal import internal.coloring_logging + # internal.file = XXX/bin/internal/__init__.py + # => need three dirname to get XXX + # we use internal to have any issue with pythonpath finding the wrong file + me_dir = os.path.dirname(os.path.dirname(os.path.dirname(internal.__file__))) + print("me_dir is", me_dir) try: if __debug__ and options.logging == 'INFO': options.logging = 'DEBUG' @@ -7419,7 +7423,8 @@ def error(self, msg=''): level = int(options.logging) else: level = eval('logging.' + options.logging) - logging.config.fileConfig(os.path.join(root_path, 'internal', 'me5_logging.conf')) + log_path = os.path.join(me_dir, 'bin', 'internal', 'me5_logging.conf') + logging.config.fileConfig(log_path) logging.root.setLevel(level) logging.getLogger('madgraph').setLevel(level) except: @@ -7433,9 +7438,9 @@ def error(self, msg=''): if '--web' in args: i = args.index('--web') args.pop(i) - cmd_line = MadEventCmd(os.path.dirname(root_path),force_run=True) + cmd_line = MadEventCmd(me_dir, force_run=True) else: - cmd_line = MadEventCmdShell(os.path.dirname(root_path),force_run=True) + cmd_line = MadEventCmdShell(me_dir, force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print(parser_error) diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/misc.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/misc.py index d3fed3baa2..91cd3e5c22 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/misc.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/misc.py @@ -347,7 +347,7 @@ def tell(msg): if dependency=='ninja': if cmd.options['ninja'] in ['None',None,''] or\ (cmd.options['ninja'] == './HEPTools/lib' and not MG5dir is None and\ - which_lib(pjoin(MG5dir,cmd.options['ninja'],'libninja.a')) is None): + which_lib(pjoin(MG5dir,cmd.options['ninja'],'lib','libninja.a')) is None): tell("Installing ninja...") cmd.do_install('ninja') diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/shower_card.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/shower_card.py index c6d3948cc4..c344ea1b15 100755 --- a/epochX/cudacpp/gg_ttg.mad/bin/internal/shower_card.py +++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/shower_card.py @@ -45,7 +45,9 @@ class ShowerCard(dict): false = ['.false.', 'f', 'false', '0'] logical_vars = ['ue_enabled', 'hadronize', 'b_stable', 'pi_stable', 'wp_stable', 'wm_stable', 'z_stable', 'h_stable', 'tap_stable', 'tam_stable', - 'mup_stable', 'mum_stable', 'is_4lep', 'is_bbar', 'combine_td'] + 'mup_stable', 'mum_stable', 'is_4lep', 'is_bbar', 'combine_td', + 'space_shower_me_corrections', 'time_shower_me_corrections', + 'time_shower_me_extended', 'time_shower_me_after_first'] string_vars = ['extralibs', 'extrapaths', 'includepaths', 'analyse'] for i in range(1,100): string_vars.append('dm_'+str(i)) @@ -82,7 +84,11 @@ class ShowerCard(dict): 'b_mass' : {'HERWIG6':'b_mass', 'PYTHIA6': 'b_mass', 'HERWIGPP': 'b_mass', 'PYTHIA8': 'b_mass'}, 'analyse' : {'HERWIG6':'hwuti', 'PYTHIA6':'pyuti', 'HERWIGPP':'hwpputi', 'PYTHIA8':'py8uti'}, 'qcut' : {'PYTHIA8':'qcut'}, - 'njmax' : {'PYTHIA8':'njmax'}} + 'njmax' : {'PYTHIA8':'njmax'}, + 'space_shower_me_corrections' : {'PYTHIA8':'space_shower_me_corrections'}, + 'time_shower_me_corrections' : {'PYTHIA8':'time_shower_me_corrections'}, + 'time_shower_me_extended' : {'PYTHIA8':'time_shower_me_extended'}, + 'time_shower_me_after_first' : {'PYTHIA8':'time_shower_me_after_first'}} stdhep_dict = {'HERWIG6':'mcatnlo_hwan_stdhep.o', 'PYTHIA6':'mcatnlo_pyan_stdhep.o'} diff --git a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h index 8995b15c82..361b488401 100644 --- a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h @@ -8,7 +8,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttg.mad/src/Parameters_sm.cc b/epochX/cudacpp/gg_ttg.mad/src/Parameters_sm.cc index 9d09eb6b62..64fc3fea62 100644 --- a/epochX/cudacpp/gg_ttg.mad/src/Parameters_sm.cc +++ b/epochX/cudacpp/gg_ttg.mad/src/Parameters_sm.cc @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttg.mad/src/Parameters_sm.h b/epochX/cudacpp/gg_ttg.mad/src/Parameters_sm.h index 6b32c66b9b..b6568d3761 100644 --- a/epochX/cudacpp/gg_ttg.mad/src/Parameters_sm.h +++ b/epochX/cudacpp/gg_ttg.mad/src/Parameters_sm.h @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index 6c3bb7fa30..97056958fe 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -14,7 +14,7 @@ Running MG5 in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect 2023-08-08 * +* VERSION 3.5.2_lo_vect 2023-11-08 * * * * WARNING: UNKNOWN DEVELOPMENT VERSION. * * WARNING: DO NOT USE FOR PRODUCTION * @@ -62,7 +62,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00565791130065918  +DEBUG: model prefixing takes 0.005817890167236328  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.331 s +ALOHA: aloha creates 5 routines in 0.326 s VVV1 VVV1 FFV1 @@ -204,6 +204,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.836s -user 0m0.731s -sys 0m0.060s +real 0m3.779s +user 0m0.713s +sys 0m0.062s diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc index 9393033e26..7f5e51681d 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.h b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.h index 9f559fe3ae..37d6ebe981 100644 --- a/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.h +++ b/epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h index 8995b15c82..361b488401 100644 --- a/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h @@ -8,7 +8,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttg.sa/src/Parameters_sm.cc b/epochX/cudacpp/gg_ttg.sa/src/Parameters_sm.cc index 9d09eb6b62..64fc3fea62 100644 --- a/epochX/cudacpp/gg_ttg.sa/src/Parameters_sm.cc +++ b/epochX/cudacpp/gg_ttg.sa/src/Parameters_sm.cc @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttg.sa/src/Parameters_sm.h b/epochX/cudacpp/gg_ttg.sa/src/Parameters_sm.h index 6b32c66b9b..b6568d3761 100644 --- a/epochX/cudacpp/gg_ttg.sa/src/Parameters_sm.h +++ b/epochX/cudacpp/gg_ttg.sa/src/Parameters_sm.h @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index 2401636ea2..eacd7a356a 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -14,7 +14,7 @@ Running MG5 in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect 2023-08-08 * +* VERSION 3.5.2_lo_vect 2023-11-08 * * * * WARNING: UNKNOWN DEVELOPMENT VERSION. * * WARNING: DO NOT USE FOR PRODUCTION * @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005415916442871094  +DEBUG: model prefixing takes 0.0053293704986572266  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.160 s +1 processes with 123 diagrams generated in 0.157 s Total: 1 processes with 123 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Processing color information for process: g g > t t~ g g @1 INFO: Creating files in directory P1_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -184,21 +184,21 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg -Generated helas calls for 1 subprocesses (123 diagrams) in 0.446 s -Wrote files for 222 helas calls in 0.728 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.425 s +Wrote files for 222 helas calls in 0.691 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.337 s +ALOHA: aloha creates 5 routines in 0.333 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -206,7 +206,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.317 s +ALOHA: aloha creates 10 routines in 0.316 s VVV1 VVV1 FFV1 @@ -239,6 +239,7 @@ patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py +Hunk #1 succeeded at 391 (offset 6 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f @@ -255,9 +256,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.354s -user 0m3.128s -sys 0m0.221s +real 0m6.262s +user 0m3.028s +sys 0m0.232s ************************************************************ * * * W E L C O M E to * @@ -270,7 +271,7 @@ sys 0m0.221s * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect * +* VERSION 3.5.2_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * @@ -303,7 +304,7 @@ launch in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect * +* VERSION 3.5.2_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat index e4d3fe550f..b7568d1a73 100644 --- a/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat @@ -8,7 +8,7 @@ #* * * * #* * #* * -#* VERSION 3.5.1_lo_vect 2023-08-08 * +#* VERSION 3.5.2_lo_vect 2023-11-08 * #* * #* WARNING: UNKNOWN DEVELOPMENT VERSION. * #* WARNING: DO NOT USE FOR PRODUCTION * diff --git a/epochX/cudacpp/gg_ttgg.mad/MGMEVersion.txt b/epochX/cudacpp/gg_ttgg.mad/MGMEVersion.txt index 1c1a95761b..85c67c3554 100644 --- a/epochX/cudacpp/gg_ttgg.mad/MGMEVersion.txt +++ b/epochX/cudacpp/gg_ttgg.mad/MGMEVersion.txt @@ -1 +1 @@ -3.5.1_lo_vect \ No newline at end of file +3.5.2_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MGVersion.txt index 1c1a95761b..85c67c3554 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MGVersion.txt +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MGVersion.txt @@ -1 +1 @@ -3.5.1_lo_vect \ No newline at end of file +3.5.2_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc index 896d64343e..57dd4aed47 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.h b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.h index d681eb7504..04f7c62976 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.h +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f index 9d747e6dc1..adf0afbe05 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f index 043887bde3..e4e527260c 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -216,7 +216,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f index df931e07c4..272c6bd97d 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -349,7 +349,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/genps.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/genps.f index fe9c61504b..c00e33d954 100644 --- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/genps.f +++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/genps.f @@ -1877,12 +1877,12 @@ double precision function get_channel_cut(p, config) d1 = iforest(1, -i, config) d2 = iforest(2, -i, config) do j=0,3 - if (d1.gt.0.and.d1.le.2) then + if (d1.gt.0.and.d1.le.nincoming) then ptemp(j,-i) = ptemp(j,-i) - ptemp(j, d1) else ptemp(j,-i) = ptemp(j,-i)+ptemp(j, d1) endif - if (d2.gt.0.and.d2.le.2) then + if (d2.gt.0.and.d2.le.nincoming) then ptemp(j,-i) = ptemp(j,-i) - ptemp(j, d2) else ptemp(j,-i) = ptemp(j,-i)+ptemp(j, d2) diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/__init__.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/__init__.py index 16e60d8182..0d17042f0d 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/__init__.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/__init__.py @@ -26,6 +26,7 @@ class aMCatNLOError(MadGraph5Error): import os import logging import time +pjoin = os.path.join #Look for basic file position MG5DIR and MG4DIR MG5DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py index ef1bf58979..3995ce8109 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py @@ -2704,7 +2704,8 @@ def __new__(cls, finput=None, **opt): except Exception as error: import launch_plugin target_class = launch_plugin.RunCard - + elif issubclass(finput, RunCard): + target_class = finput else: return None @@ -2968,11 +2969,12 @@ def write(self, output_file, template=None, python_template=False, if python_template and not to_write: import string if self.blocks: - text = string.Template(text) mapping = {} for b in self.blocks: mapping[b.name] = b.get_template(self) - text = text.substitute(mapping) + if "$%s" % b.name not in text: + text += "\n$%s\n" % b.name + text = string.Template(text).substitute(mapping) if not self.list_parameter: text = text % self @@ -4875,6 +4877,9 @@ def create_default_for_process(self, proc_characteristic, history, proc_def): continue break + if proc_characteristic['ninitial'] == 1: + self['SDE_strategy'] =1 + if 'MLM' in proc_characteristic['limitations']: if self['dynamical_scale_choice'] == -1: self['dynamical_scale_choice'] = 3 @@ -5940,7 +5945,7 @@ def default_setup(self): self.add_param("CheckCycle", 3) self.add_param("MaxAttempts", 10) self.add_param("ZeroThres", 1e-9) - self.add_param("OSThres", 1.0e-13) + self.add_param("OSThres", 1.0e-8) self.add_param("DoubleCheckHelicityFilter", True) self.add_param("WriteOutFilters", True) self.add_param("UseLoopFilter", False) diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/common_run_interface.py index 14c7f310dc..87cb4b88df 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/common_run_interface.py @@ -4906,6 +4906,7 @@ def __init__(self, question, cards=[], from_banner=None, banner=None, mode='auto self.load_default() self.define_paths(**opt) self.last_editline_pos = 0 + self.update_dependent_done = False if 'allow_arg' not in opt or not opt['allow_arg']: # add some mininal content for this: @@ -6585,7 +6586,9 @@ def postcmd(self, stop, line): self.check_card_consistency() if self.param_consistency: try: - self.do_update('dependent', timer=20) + if not self.update_dependent_done: + self.do_update('dependent', timer=20) + self.update_dependent_done = False except MadGraph5Error as error: if 'Missing block:' in str(error): self.fail_due_to_format +=1 @@ -6638,6 +6641,8 @@ def do_update(self, line, timer=0): self.update_dependent(self.mother_interface, self.me_dir, self.param_card, self.paths['param'], timer, run_card=self.run_card, lhapdfconfig=self.lhapdf) + self.update_dependent_done = True + elif args[0] == 'missing': self.update_missing() @@ -6717,12 +6722,13 @@ class TimeOutError(Exception): def handle_alarm(signum, frame): raise TimeOutError signal.signal(signal.SIGALRM, handle_alarm) + if timer: - signal.alarm(timer) log_level=30 else: log_level=20 + if run_card: as_for_pdf = {'cteq6_m': 0.118, 'cteq6_d': 0.118, @@ -6781,6 +6787,10 @@ def handle_alarm(signum, frame): logger.log(log_level, "update the strong coupling value (alpha_s) to the value from the pdf selected: %s", as_for_pdf[pdlabel]) modify = True + if timer: + signal.alarm(timer) + + # Try to load the model in the limited amount of time allowed try: model = mecmd.get_model() @@ -6909,7 +6919,8 @@ def check_block(self, blockname): def check_answer_consistency(self): """function called if the code reads a file""" self.check_card_consistency() - self.do_update('dependent', timer=20) + if not self.update_dependent_done: + self.do_update('dependent', timer=20) def help_set(self): '''help message for set''' diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/gen_ximprove.py index a88d60b282..5fd170d18d 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/gen_ximprove.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/gen_ximprove.py @@ -157,12 +157,16 @@ def get_helicity(self, to_submit=True, clean=True): (stdout, _) = p.communicate(''.encode()) stdout = stdout.decode('ascii',errors='ignore') - try: + if stdout: nb_channel = max([math.floor(float(d)) for d in stdout.split()]) - except Exception as error: - misc.sprint(stdout, 'no channel or error for %s' % Pdir) - continue - + else: + for matrix_file in misc.glob('matrix*orig.f', Pdir): + files.cp(matrix_file, matrix_file.replace('orig','optim')) + P_zero_result.append(Pdir) + if os.path.exists(pjoin(self.me_dir, 'error')): + os.remove(pjoin(self.me_dir, 'error')) + continue # bypass bad process + self.cmd.compile(['madevent_forhel'], cwd=Pdir) if not os.path.exists(pjoin(Pdir, 'madevent_forhel')): raise Exception('Error make madevent_forhel not successful') @@ -183,11 +187,13 @@ def get_helicity(self, to_submit=True, clean=True): #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts (stdout, _) = p.communicate(" ".encode()) stdout = stdout.decode('ascii',errors='ignore') - if os.path.exists(pjoin(self.me_dir,'error')): + if os.path.exists(pjoin(self.me_dir, 'error')): raise Exception(pjoin(self.me_dir,'error')) # note a continue is not enough here, we have in top to link # the matrixX_optim.f to matrixX_orig.f to let the code to work # after this error. + # for matrix_file in misc.glob('matrix*orig.f', Pdir): + # files.cp(matrix_file, matrix_file.replace('orig','optim')) if 'no events passed cuts' in stdout: raise Exception diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py index c9d1c7706a..0b849330ef 100644 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py @@ -57,7 +57,7 @@ def reset_simd(self, old_value, new_value, name): return Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - + def plugin_input(self, finput): return @@ -79,7 +79,7 @@ def check_validity(self): self['sde_strategy'] = 1 if self['hel_recycling']: self['hel_recycling'] = False - + class GPURunCard(CPPRunCard): def default_setup(self): super(CPPRunCard, self).default_setup() diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/madevent_interface.py index d722702891..853aabc98a 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/madevent_interface.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/madevent_interface.py @@ -1,4 +1,4 @@ -################################################################################ +############################################################################### # # Copyright (c) 2011 The MadGraph5_aMC@NLO Development team and Contributors # @@ -3675,7 +3675,7 @@ def do_refine(self, line): devnull.close() ############################################################################ - def do_combine_iteration(self, line): + def do_comine_iteration(self, line): """Not in help: Combine a given iteration combine_iteration Pdir Gdir S|R step S is for survey R is for refine @@ -3703,7 +3703,7 @@ def do_combine_iteration(self, line): ############################################################################ def do_combine_events(self, line): """Advanced commands: Launch combine events""" - + start=time.time() args = self.split_arg(line) start = time.time() # Check argument's validity @@ -3798,9 +3798,7 @@ def do_combine_events(self, line): self.correct_bias() elif self.run_card['custom_fcts']: self.correct_bias() - - logger.info("combine events done in %s", time.time()-start) - + logger.info("combination of events done in %s s ", time.time()-start) self.to_store.append('event') @@ -7368,7 +7366,7 @@ def wait_monitoring(Idle, Running, Done): import optparse # Get the directory of the script real path (bin) # and add it to the current PYTHONPATH - root_path = os.path.dirname(os.path.dirname(os.path.realpath( __file__ ))) + #root_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath( __file__ )))) sys.path.insert(0, root_path) class MyOptParser(optparse.OptionParser): @@ -7411,7 +7409,13 @@ def error(self, msg=''): import logging.config # Set logging level according to the logging level given by options #logging.basicConfig(level=vars(logging)[options.logging]) + import internal import internal.coloring_logging + # internal.file = XXX/bin/internal/__init__.py + # => need three dirname to get XXX + # we use internal to have any issue with pythonpath finding the wrong file + me_dir = os.path.dirname(os.path.dirname(os.path.dirname(internal.__file__))) + print("me_dir is", me_dir) try: if __debug__ and options.logging == 'INFO': options.logging = 'DEBUG' @@ -7419,7 +7423,8 @@ def error(self, msg=''): level = int(options.logging) else: level = eval('logging.' + options.logging) - logging.config.fileConfig(os.path.join(root_path, 'internal', 'me5_logging.conf')) + log_path = os.path.join(me_dir, 'bin', 'internal', 'me5_logging.conf') + logging.config.fileConfig(log_path) logging.root.setLevel(level) logging.getLogger('madgraph').setLevel(level) except: @@ -7433,9 +7438,9 @@ def error(self, msg=''): if '--web' in args: i = args.index('--web') args.pop(i) - cmd_line = MadEventCmd(os.path.dirname(root_path),force_run=True) + cmd_line = MadEventCmd(me_dir, force_run=True) else: - cmd_line = MadEventCmdShell(os.path.dirname(root_path),force_run=True) + cmd_line = MadEventCmdShell(me_dir, force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print(parser_error) diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/misc.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/misc.py index d3fed3baa2..91cd3e5c22 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/misc.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/misc.py @@ -347,7 +347,7 @@ def tell(msg): if dependency=='ninja': if cmd.options['ninja'] in ['None',None,''] or\ (cmd.options['ninja'] == './HEPTools/lib' and not MG5dir is None and\ - which_lib(pjoin(MG5dir,cmd.options['ninja'],'libninja.a')) is None): + which_lib(pjoin(MG5dir,cmd.options['ninja'],'lib','libninja.a')) is None): tell("Installing ninja...") cmd.do_install('ninja') diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/shower_card.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/shower_card.py index c6d3948cc4..c344ea1b15 100755 --- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/shower_card.py +++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/shower_card.py @@ -45,7 +45,9 @@ class ShowerCard(dict): false = ['.false.', 'f', 'false', '0'] logical_vars = ['ue_enabled', 'hadronize', 'b_stable', 'pi_stable', 'wp_stable', 'wm_stable', 'z_stable', 'h_stable', 'tap_stable', 'tam_stable', - 'mup_stable', 'mum_stable', 'is_4lep', 'is_bbar', 'combine_td'] + 'mup_stable', 'mum_stable', 'is_4lep', 'is_bbar', 'combine_td', + 'space_shower_me_corrections', 'time_shower_me_corrections', + 'time_shower_me_extended', 'time_shower_me_after_first'] string_vars = ['extralibs', 'extrapaths', 'includepaths', 'analyse'] for i in range(1,100): string_vars.append('dm_'+str(i)) @@ -82,7 +84,11 @@ class ShowerCard(dict): 'b_mass' : {'HERWIG6':'b_mass', 'PYTHIA6': 'b_mass', 'HERWIGPP': 'b_mass', 'PYTHIA8': 'b_mass'}, 'analyse' : {'HERWIG6':'hwuti', 'PYTHIA6':'pyuti', 'HERWIGPP':'hwpputi', 'PYTHIA8':'py8uti'}, 'qcut' : {'PYTHIA8':'qcut'}, - 'njmax' : {'PYTHIA8':'njmax'}} + 'njmax' : {'PYTHIA8':'njmax'}, + 'space_shower_me_corrections' : {'PYTHIA8':'space_shower_me_corrections'}, + 'time_shower_me_corrections' : {'PYTHIA8':'time_shower_me_corrections'}, + 'time_shower_me_extended' : {'PYTHIA8':'time_shower_me_extended'}, + 'time_shower_me_after_first' : {'PYTHIA8':'time_shower_me_after_first'}} stdhep_dict = {'HERWIG6':'mcatnlo_hwan_stdhep.o', 'PYTHIA6':'mcatnlo_pyan_stdhep.o'} diff --git a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h index 9b946c21e1..8df465ad6d 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h @@ -8,7 +8,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttgg.mad/src/Parameters_sm.cc b/epochX/cudacpp/gg_ttgg.mad/src/Parameters_sm.cc index 9d09eb6b62..64fc3fea62 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/Parameters_sm.cc +++ b/epochX/cudacpp/gg_ttgg.mad/src/Parameters_sm.cc @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttgg.mad/src/Parameters_sm.h b/epochX/cudacpp/gg_ttgg.mad/src/Parameters_sm.h index 6b32c66b9b..b6568d3761 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/Parameters_sm.h +++ b/epochX/cudacpp/gg_ttgg.mad/src/Parameters_sm.h @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index d29fe4c726..80631c94bf 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -14,7 +14,7 @@ Running MG5 in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect 2023-08-08 * +* VERSION 3.5.2_lo_vect 2023-11-08 * * * * WARNING: UNKNOWN DEVELOPMENT VERSION. * * WARNING: DO NOT USE FOR PRODUCTION * @@ -62,7 +62,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005518913269042969  +DEBUG: model prefixing takes 0.00567317008972168  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.160 s +1 processes with 123 diagrams generated in 0.157 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.430 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.423 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.324 s +ALOHA: aloha creates 5 routines in 0.318 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m1.541s -user 0m1.392s -sys 0m0.062s +real 0m4.435s +user 0m1.373s +sys 0m0.056s diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc index 927a19a802..204439a1dc 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.h b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.h index d681eb7504..04f7c62976 100644 --- a/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.h +++ b/epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h index 9b946c21e1..8df465ad6d 100644 --- a/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h @@ -8,7 +8,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttgg.sa/src/Parameters_sm.cc b/epochX/cudacpp/gg_ttgg.sa/src/Parameters_sm.cc index 9d09eb6b62..64fc3fea62 100644 --- a/epochX/cudacpp/gg_ttgg.sa/src/Parameters_sm.cc +++ b/epochX/cudacpp/gg_ttgg.sa/src/Parameters_sm.cc @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttgg.sa/src/Parameters_sm.h b/epochX/cudacpp/gg_ttgg.sa/src/Parameters_sm.h index 6b32c66b9b..b6568d3761 100644 --- a/epochX/cudacpp/gg_ttgg.sa/src/Parameters_sm.h +++ b/epochX/cudacpp/gg_ttgg.sa/src/Parameters_sm.h @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index cd9806264d..ab3974344c 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -14,7 +14,7 @@ Running MG5 in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect 2023-08-08 * +* VERSION 3.5.2_lo_vect 2023-11-08 * * * * WARNING: UNKNOWN DEVELOPMENT VERSION. * * WARNING: DO NOT USE FOR PRODUCTION * @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005460023880004883  +DEBUG: model prefixing takes 0.005319833755493164  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.902 s +1 processes with 1240 diagrams generated in 1.855 s Total: 1 processes with 1240 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -177,7 +177,7 @@ INFO: Creating files in directory P1_gg_ttxggg INFO: Computing Color-Flow optimization [15120 term] INFO: Color-Flow passed to 1630 term in 8s. Introduce 3030 contraction DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -186,21 +186,21 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.662 s -Wrote files for 2281 helas calls in 18.810 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.574 s +Wrote files for 2281 helas calls in 18.431 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.319 s +ALOHA: aloha creates 5 routines in 0.335 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -208,7 +208,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.314 s +ALOHA: aloha creates 10 routines in 0.313 s VVV1 VVV1 FFV1 @@ -241,6 +241,7 @@ patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py +Hunk #1 succeeded at 391 (offset 6 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f @@ -257,9 +258,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m29.634s -user 0m29.131s -sys 0m0.396s +real 0m32.103s +user 0m28.586s +sys 0m0.412s ************************************************************ * * * W E L C O M E to * @@ -272,7 +273,7 @@ sys 0m0.396s * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect * +* VERSION 3.5.2_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * @@ -305,7 +306,7 @@ launch in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect * +* VERSION 3.5.2_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat index 05d11d495d..2f92ecc4ba 100644 --- a/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat @@ -8,7 +8,7 @@ #* * * * #* * #* * -#* VERSION 3.5.1_lo_vect 2023-08-08 * +#* VERSION 3.5.2_lo_vect 2023-11-08 * #* * #* WARNING: UNKNOWN DEVELOPMENT VERSION. * #* WARNING: DO NOT USE FOR PRODUCTION * diff --git a/epochX/cudacpp/gg_ttggg.mad/MGMEVersion.txt b/epochX/cudacpp/gg_ttggg.mad/MGMEVersion.txt index 1c1a95761b..85c67c3554 100644 --- a/epochX/cudacpp/gg_ttggg.mad/MGMEVersion.txt +++ b/epochX/cudacpp/gg_ttggg.mad/MGMEVersion.txt @@ -1 +1 @@ -3.5.1_lo_vect \ No newline at end of file +3.5.2_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MGVersion.txt index 1c1a95761b..85c67c3554 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MGVersion.txt +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MGVersion.txt @@ -1 +1 @@ -3.5.1_lo_vect \ No newline at end of file +3.5.2_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc index a525c4ba3f..59033d7b2f 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.h b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.h index dc41720ca6..2565923dde 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.h +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f index 2d3c5725be..d2a61fa2ac 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f index 51b8d47520..f22dfbf5e6 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -216,7 +216,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f index b8a6a894de..41dbc97183 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -413,7 +413,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -17540,7 +17540,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) TMP_JAMP(2914) = TMP_JAMP(2351) + TMP_JAMP(1665) ! used 2 times TMP_JAMP(2913) = TMP_JAMP(2310) + TMP_JAMP(2134) ! used 2 times TMP_JAMP(2912) = TMP_JAMP(2073) + ((-0.000000000000000D+00 - $ ,1.000000000000000D+00)) * AMP(1483) ! used 2 times + $ ,1.000000000000000D+00)) * AMP(1481) ! used 2 times TMP_JAMP(3030) = TMP_JAMP(2935) + ((0.000000000000000D+00, $ -1.000000000000000D+00)) * TMP_JAMP(1044) ! used 2 times TMP_JAMP(3029) = TMP_JAMP(2934) - TMP_JAMP(329) ! used 2 times @@ -17688,7 +17688,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ +TMP_JAMP(360)+TMP_JAMP(485)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(558)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(576)+((0.000000000000000D+00 - $ ,1.000000000000000D+00))*AMP(1489)+(-1.000000000000000D+00) + $ ,1.000000000000000D+00))*AMP(1485)+(-1.000000000000000D+00) $ *TMP_JAMP(2911)+(-1.000000000000000D+00)*TMP_JAMP(2916)+( $ -1.000000000000000D+00)*TMP_JAMP(2971)+TMP_JAMP(2994) JAMP(2,1) = (-1.000000000000000D+00)*AMP(242)+( @@ -17698,7 +17698,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(557)+((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(576)+((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(1580)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(1480)+TMP_JAMP(2655)+(-1.000000000000000D+00) + $ *AMP(1476)+TMP_JAMP(2655)+(-1.000000000000000D+00) $ *TMP_JAMP(2913)+(-1.000000000000000D+00)*TMP_JAMP(2940) JAMP(3,1) = (-1.000000000000000D+00)*AMP(250)+( $ -1.000000000000000D+00)*TMP_JAMP(484)+((0.000000000000000D+00 @@ -17715,7 +17715,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ -1.000000000000000D+00))*TMP_JAMP(575)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(1589)+TMP_JAMP(1693) $ +TMP_JAMP(2050)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *AMP(1471)+(-1.000000000000000D+00)*TMP_JAMP(2353) + $ *AMP(1467)+(-1.000000000000000D+00)*TMP_JAMP(2353) $ +TMP_JAMP(2659)+TMP_JAMP(2905)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(2955)+TMP_JAMP(2960) JAMP(5,1) = (-1.000000000000000D+00)*AMP(241) @@ -17919,7 +17919,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) JAMP(25,1) = (-1.000000000000000D+00)*TMP_JAMP(360) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(454) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(517) - $ +(-1.000000000000000D+00)*AMP(976)+(-1.000000000000000D+00) + $ +(-1.000000000000000D+00)*AMP(974)+(-1.000000000000000D+00) $ *TMP_JAMP(1843)+TMP_JAMP(1859)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(2085)+TMP_JAMP(2104)+( $ -1.000000000000000D+00)*TMP_JAMP(2662)+TMP_JAMP(2851) @@ -17929,7 +17929,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ -1.000000000000000D+00))*TMP_JAMP(518)+(-1.000000000000000D+00) $ *TMP_JAMP(834)+(-1.000000000000000D+00)*TMP_JAMP(1019) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1374) - $ +(-1.000000000000000D+00)*AMP(967)+(-1.000000000000000D+00) + $ +(-1.000000000000000D+00)*AMP(965)+(-1.000000000000000D+00) $ *TMP_JAMP(1479)+TMP_JAMP(1842)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(2085)+(-1.000000000000000D+00) $ *TMP_JAMP(2129)+(-1.000000000000000D+00)*TMP_JAMP(2648) @@ -17940,7 +17940,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(513)+(-1.000000000000000D+00)*TMP_JAMP(809)+( $ -1.000000000000000D+00)*TMP_JAMP(1028)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(1373)+(-1.000000000000000D+00) - $ *AMP(975)+(-1.000000000000000D+00)*TMP_JAMP(1963)+TMP_JAMP(2060) + $ *AMP(973)+(-1.000000000000000D+00)*TMP_JAMP(1963)+TMP_JAMP(2060) $ +(-1.000000000000000D+00)*TMP_JAMP(2104)+TMP_JAMP(2317) $ +TMP_JAMP(2387)+TMP_JAMP(2567)+(-1.000000000000000D+00) $ *TMP_JAMP(2604)+TMP_JAMP(2796)+TMP_JAMP(2811)+( @@ -17950,7 +17950,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(470)+((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(514)+((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(735)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1392)+(-1.000000000000000D+00)*AMP(958)+TMP_JAMP(1448) + $ *TMP_JAMP(1392)+(-1.000000000000000D+00)*AMP(956)+TMP_JAMP(1448) $ +(-1.000000000000000D+00)*TMP_JAMP(1839)+((0.000000000000000D $ +00,1.000000000000000D+00))*TMP_JAMP(1846)+(-1.000000000000000D $ +00)*TMP_JAMP(1919)+TMP_JAMP(1963)+(-1.000000000000000D+00) @@ -17960,13 +17960,13 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) JAMP(29,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(314)+((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(462)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(717)+(-1.000000000000000D+00)*AMP(966)+TMP_JAMP(1709) + $ *TMP_JAMP(717)+(-1.000000000000000D+00)*AMP(964)+TMP_JAMP(1709) $ +(-1.000000000000000D+00)*TMP_JAMP(1874)+TMP_JAMP(2061) - $ +TMP_JAMP(2129)+AMP(1642)+TMP_JAMP(2445)+(-1.000000000000000D + $ +TMP_JAMP(2129)+AMP(1638)+TMP_JAMP(2445)+(-1.000000000000000D $ +00)*TMP_JAMP(2493)+TMP_JAMP(2647)+TMP_JAMP(2985)+TMP_JAMP(2996) JAMP(30,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(320)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(520)+(-1.000000000000000D+00)*AMP(957)+( + $ *TMP_JAMP(520)+(-1.000000000000000D+00)*AMP(955)+( $ -1.000000000000000D+00)*TMP_JAMP(1840)+TMP_JAMP(1874) $ +TMP_JAMP(1919)+TMP_JAMP(1966)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(2064)+TMP_JAMP(2250)+( @@ -17974,7 +17974,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ +TMP_JAMP(3000)+TMP_JAMP(3007) JAMP(31,1) = TMP_JAMP(804)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(1391)+(-1.000000000000000D+00) - $ *AMP(979)+TMP_JAMP(1857)+TMP_JAMP(1894)+TMP_JAMP(2130) + $ *AMP(977)+TMP_JAMP(1857)+TMP_JAMP(1894)+TMP_JAMP(2130) $ +TMP_JAMP(2609)+(-1.000000000000000D+00)*TMP_JAMP(2816) $ +TMP_JAMP(2825)+(-1.000000000000000D+00)*TMP_JAMP(2863)+( $ -1.000000000000000D+00)*TMP_JAMP(3018) @@ -17982,7 +17982,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(949)+((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(1147)+TMP_JAMP(1280)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(1374)+(-1.000000000000000D+00) - $ *AMP(970)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *AMP(968)+((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(2067)+(-1.000000000000000D+00)*TMP_JAMP(2130) $ +TMP_JAMP(2333)+(-1.000000000000000D+00)*TMP_JAMP(2542) $ +TMP_JAMP(2713)+(-1.000000000000000D+00)*TMP_JAMP(2763) @@ -17991,7 +17991,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) JAMP(33,1) = (-1.000000000000000D+00)*TMP_JAMP(1102)+( $ -1.000000000000000D+00)*TMP_JAMP(1256)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(1391)+(-1.000000000000000D+00) - $ *AMP(977)+(-1.000000000000000D+00)*TMP_JAMP(1688)+( + $ *AMP(975)+(-1.000000000000000D+00)*TMP_JAMP(1688)+( $ -1.000000000000000D+00)*TMP_JAMP(2556)+TMP_JAMP(2811) $ +TMP_JAMP(2817)+TMP_JAMP(2882)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(2976)+(-1.000000000000000D+00) @@ -18009,7 +18009,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(1033)+((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(1152)+((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(1155)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1224)+(-1.000000000000000D+00)*AMP(968)+TMP_JAMP(1582) + $ *TMP_JAMP(1224)+(-1.000000000000000D+00)*AMP(966)+TMP_JAMP(1582) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2006) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2105) $ +TMP_JAMP(2514)+TMP_JAMP(2546)+TMP_JAMP(2695)+( @@ -18029,7 +18029,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ -1.000000000000000D+00))*TMP_JAMP(910)+TMP_JAMP(1277) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1346) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1373) - $ +(-1.000000000000000D+00)*AMP(980)+TMP_JAMP(1883) + $ +(-1.000000000000000D+00)*AMP(978)+TMP_JAMP(1883) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2066) $ +TMP_JAMP(2128)+TMP_JAMP(2609)+(-1.000000000000000D+00) $ *TMP_JAMP(2846)+(-1.000000000000000D+00)*TMP_JAMP(2899)+( @@ -18040,7 +18040,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ -1.000000000000000D+00))*TMP_JAMP(1143)+((0.000000000000000D+00 $ ,-1.000000000000000D+00))*TMP_JAMP(1148)+((0.000000000000000D $ +00,-1.000000000000000D+00))*TMP_JAMP(1392)+( - $ -1.000000000000000D+00)*AMP(961)+(-1.000000000000000D+00) + $ -1.000000000000000D+00)*AMP(959)+(-1.000000000000000D+00) $ *TMP_JAMP(2128)+((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(2138)+TMP_JAMP(2296)+(-1.000000000000000D+00) $ *TMP_JAMP(2483)+(-1.000000000000000D+00)*TMP_JAMP(2535)+( @@ -18050,7 +18050,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ -1.000000000000000D+00)*TMP_JAMP(1020)+(-1.000000000000000D+00) $ *TMP_JAMP(1039)+TMP_JAMP(1100)+(-1.000000000000000D+00) $ *TMP_JAMP(1255)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1346)+(-1.000000000000000D+00)*AMP(978)+TMP_JAMP(1686) + $ *TMP_JAMP(1346)+(-1.000000000000000D+00)*AMP(976)+TMP_JAMP(1686) $ +(-1.000000000000000D+00)*TMP_JAMP(1799)+((0.000000000000000D $ +00,1.000000000000000D+00))*TMP_JAMP(1988)+(-1.000000000000000D $ +00)*TMP_JAMP(2497)+TMP_JAMP(2591)+(-1.000000000000000D+00) @@ -18072,7 +18072,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ ,1.000000000000000D+00))*TMP_JAMP(1159)+((0.000000000000000D+00 $ ,-1.000000000000000D+00))*TMP_JAMP(1211)+(-1.000000000000000D $ +00)*TMP_JAMP(1270)+((0.000000000000000D+00,-1.000000000000000D - $ +00))*TMP_JAMP(1311)+(-1.000000000000000D+00)*AMP(959) + $ +00))*TMP_JAMP(1311)+(-1.000000000000000D+00)*AMP(957) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1784) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1868) $ +(-1.000000000000000D+00)*TMP_JAMP(1939)+((0.000000000000000D @@ -18094,11 +18094,11 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) JAMP(43,1) = TMP_JAMP(678)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(688)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(949)+TMP_JAMP(1387)+( - $ -1.000000000000000D+00)*AMP(971)+TMP_JAMP(2125)+TMP_JAMP(2127) + $ -1.000000000000000D+00)*AMP(969)+TMP_JAMP(2125)+TMP_JAMP(2127) $ +(-1.000000000000000D+00)*TMP_JAMP(2481)+TMP_JAMP(2497)+( $ -1.000000000000000D+00)*TMP_JAMP(2722)+(-1.000000000000000D+00) $ *TMP_JAMP(2897)+(-1.000000000000000D+00)*TMP_JAMP(2996) - JAMP(44,1) = TMP_JAMP(1384)+(-1.000000000000000D+00)*AMP(962)+( + JAMP(44,1) = TMP_JAMP(1384)+(-1.000000000000000D+00)*AMP(960)+( $ -1.000000000000000D+00)*TMP_JAMP(2126)+(-1.000000000000000D+00) $ *TMP_JAMP(2127)+(-1.000000000000000D+00)*TMP_JAMP(2535) $ +TMP_JAMP(2556)+(-1.000000000000000D+00)*TMP_JAMP(2730)+( @@ -18107,7 +18107,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) JAMP(45,1) = ((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(728)+((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(874)+TMP_JAMP(1382)+(-1.000000000000000D+00) - $ *TMP_JAMP(1387)+(-1.000000000000000D+00)*AMP(969)+TMP_JAMP(1824) + $ *TMP_JAMP(1387)+(-1.000000000000000D+00)*AMP(967)+TMP_JAMP(1824) $ +(-1.000000000000000D+00)*TMP_JAMP(2088)+((0.000000000000000D $ +00,1.000000000000000D+00))*TMP_JAMP(2105)+(-1.000000000000000D $ +00)*TMP_JAMP(2327)+(-1.000000000000000D+00)*TMP_JAMP(2608) @@ -18127,7 +18127,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) JAMP(47,1) = TMP_JAMP(1129)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(1158)+((0.000000000000000D+00 $ ,-1.000000000000000D+00))*TMP_JAMP(1303)+(-1.000000000000000D - $ +00)*TMP_JAMP(1384)+(-1.000000000000000D+00)*AMP(960) + $ +00)*TMP_JAMP(1384)+(-1.000000000000000D+00)*AMP(958) $ +TMP_JAMP(1563)+((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(2086)+(-1.000000000000000D+00)*TMP_JAMP(2089)+( $ -1.000000000000000D+00)*TMP_JAMP(2364)+TMP_JAMP(2466)+( @@ -18146,21 +18146,21 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(1900)+TMP_JAMP(1972)+TMP_JAMP(2677)+( $ -1.000000000000000D+00)*TMP_JAMP(2897)+TMP_JAMP(2954) JAMP(49,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1393)+(-1.000000000000000D+00)*AMP(1405) + $ *TMP_JAMP(1393)+(-1.000000000000000D+00)*AMP(1403) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1746) $ +TMP_JAMP(1892)+(-1.000000000000000D+00)*TMP_JAMP(1939) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2136) $ +TMP_JAMP(2579)+TMP_JAMP(2630)+(-1.000000000000000D+00) $ *TMP_JAMP(2836)+TMP_JAMP(2837)+TMP_JAMP(2860)+TMP_JAMP(2990) JAMP(50,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1405)+(-1.000000000000000D+00)*AMP(1399)+( + $ *TMP_JAMP(1405)+(-1.000000000000000D+00)*AMP(1397)+( $ -1.000000000000000D+00)*TMP_JAMP(1892)+TMP_JAMP(1938) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1977) $ +TMP_JAMP(2026)+(-1.000000000000000D+00)*TMP_JAMP(2620) $ +TMP_JAMP(2731)+TMP_JAMP(2783)+TMP_JAMP(2938)+TMP_JAMP(2986) JAMP(51,1) = ((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(1394)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1397)+(-1.000000000000000D+00)*AMP(1404) + $ *TMP_JAMP(1397)+(-1.000000000000000D+00)*AMP(1402) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1737) $ +TMP_JAMP(1891)+TMP_JAMP(1937)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(2136)+TMP_JAMP(2575) @@ -18168,11 +18168,11 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ -1.000000000000000D+00)*TMP_JAMP(2895) JAMP(52,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(1176)+TMP_JAMP(1385)+(-1.000000000000000D+00) - $ *AMP(1020)+(-1.000000000000000D+00)*TMP_JAMP(1619)+( + $ *AMP(1018)+(-1.000000000000000D+00)*TMP_JAMP(1619)+( $ -1.000000000000000D+00)*TMP_JAMP(1891)+TMP_JAMP(2145)+( $ -1.000000000000000D+00)*TMP_JAMP(2531)+(-1.000000000000000D+00) $ *TMP_JAMP(2853)+TMP_JAMP(2938)+TMP_JAMP(2988)+TMP_JAMP(3009) - JAMP(53,1) = TMP_JAMP(1415)+(-1.000000000000000D+00)*AMP(1398) + JAMP(53,1) = TMP_JAMP(1415)+(-1.000000000000000D+00)*AMP(1396) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1744) $ +(-1.000000000000000D+00)*TMP_JAMP(1811)+TMP_JAMP(1890) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1977) @@ -18184,7 +18184,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(721)+(-1.000000000000000D+00)*TMP_JAMP(1263) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1295) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1375) - $ +(-1.000000000000000D+00)*AMP(1019)+(-1.000000000000000D+00) + $ +(-1.000000000000000D+00)*AMP(1017)+(-1.000000000000000D+00) $ *TMP_JAMP(1655)+(-1.000000000000000D+00)*TMP_JAMP(1890) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1986) $ +(-1.000000000000000D+00)*TMP_JAMP(2145)+TMP_JAMP(2492) @@ -18194,7 +18194,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) JAMP(55,1) = ((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(1063)+TMP_JAMP(1141)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(1177)+(-1.000000000000000D+00) - $ *AMP(1408)+(-1.000000000000000D+00)*TMP_JAMP(1894)+( + $ *AMP(1406)+(-1.000000000000000D+00)*TMP_JAMP(1894)+( $ -1.000000000000000D+00)*TMP_JAMP(2075)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(2108)+(-1.000000000000000D+00) $ *TMP_JAMP(2578)+TMP_JAMP(2821)+(-1.000000000000000D+00) @@ -18203,7 +18203,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) JAMP(56,1) = TMP_JAMP(647)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(1168)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(1205)+(-1.000000000000000D+00) - $ *AMP(1402)+TMP_JAMP(2047)+((0.000000000000000D+00, + $ *AMP(1400)+TMP_JAMP(2047)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(2108)+(-1.000000000000000D+00) $ *TMP_JAMP(2452)+TMP_JAMP(2814)+(-1.000000000000000D+00) $ *TMP_JAMP(2940)+(-1.000000000000000D+00)*TMP_JAMP(2957)+( @@ -18213,7 +18213,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(1172)+TMP_JAMP(1257)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(1301)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(1340)+(-1.000000000000000D+00) - $ *AMP(1406)+TMP_JAMP(1677)+((0.000000000000000D+00 + $ *AMP(1404)+TMP_JAMP(1677)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(2142)+(-1.000000000000000D+00) $ *TMP_JAMP(2820)+TMP_JAMP(2832)+(-1.000000000000000D+00) $ *TMP_JAMP(2909)+((0.000000000000000D+00,-1.000000000000000D+00)) @@ -18233,7 +18233,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(893) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1169) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1209) - $ +TMP_JAMP(1377)+(-1.000000000000000D+00)*AMP(1400) + $ +TMP_JAMP(1377)+(-1.000000000000000D+00)*AMP(1398) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1776) $ +(-1.000000000000000D+00)*TMP_JAMP(2149)+TMP_JAMP(2729)+( $ -1.000000000000000D+00)*TMP_JAMP(2819)+(-1.000000000000000D+00) @@ -18251,7 +18251,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ -1.000000000000000D+00)*TMP_JAMP(2879)+(-1.000000000000000D+00) $ *TMP_JAMP(2983) JAMP(61,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1394)+(-1.000000000000000D+00)*AMP(1409) + $ *TMP_JAMP(1394)+(-1.000000000000000D+00)*AMP(1407) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2106) $ +(-1.000000000000000D+00)*TMP_JAMP(2319)+(-1.000000000000000D $ +00)*TMP_JAMP(2805)+(-1.000000000000000D+00)*TMP_JAMP(2881) @@ -18261,14 +18261,14 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ ,1.000000000000000D+00))*TMP_JAMP(1231)+((0.000000000000000D+00 $ ,-1.000000000000000D+00))*TMP_JAMP(1288)+((0.000000000000000D $ +00,1.000000000000000D+00))*TMP_JAMP(1342)+(-1.000000000000000D - $ +00)*AMP(1022)+((0.000000000000000D+00,1.000000000000000D+00)) + $ +00)*AMP(1020)+((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(2106)+(-1.000000000000000D+00)*TMP_JAMP(2146)+( $ -1.000000000000000D+00)*TMP_JAMP(2271)+TMP_JAMP(2363) $ +TMP_JAMP(2437)+TMP_JAMP(2562)+(-1.000000000000000D+00) $ *TMP_JAMP(2745)+(-1.000000000000000D+00)*TMP_JAMP(2988)+( $ -1.000000000000000D+00)*TMP_JAMP(3022) JAMP(63,1) = (-1.000000000000000D+00)*TMP_JAMP(1380)+( - $ -1.000000000000000D+00)*AMP(1407)+TMP_JAMP(1952) + $ -1.000000000000000D+00)*AMP(1405)+TMP_JAMP(1952) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2142) $ +(-1.000000000000000D+00)*TMP_JAMP(2341)+TMP_JAMP(2452)+( $ -1.000000000000000D+00)*TMP_JAMP(2687)+(-1.000000000000000D+00) @@ -18278,7 +18278,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(311) $ +(-1.000000000000000D+00)*TMP_JAMP(421)+((0.000000000000000D+00 $ ,-1.000000000000000D+00))*TMP_JAMP(501)+TMP_JAMP(1380)+( - $ -1.000000000000000D+00)*AMP(947)+((0.000000000000000D+00, + $ -1.000000000000000D+00)*AMP(945)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(1544)+TMP_JAMP(1683) $ +TMP_JAMP(1801)+(-1.000000000000000D+00)*TMP_JAMP(2450) $ +TMP_JAMP(2586)+TMP_JAMP(2720)+TMP_JAMP(2869) @@ -18287,7 +18287,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) JAMP(65,1) = TMP_JAMP(579)+(-1.000000000000000D+00) $ *TMP_JAMP(1008)+((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(1049)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1218)+(-1.000000000000000D+00)*AMP(1017) + $ *TMP_JAMP(1218)+(-1.000000000000000D+00)*AMP(1015) $ +TMP_JAMP(1611)+TMP_JAMP(1862)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(1901)+TMP_JAMP(2273)+( $ -1.000000000000000D+00)*TMP_JAMP(2441)+TMP_JAMP(3022) @@ -18304,7 +18304,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ -1.000000000000000D+00)*TMP_JAMP(2584)+TMP_JAMP(2887)+( $ -1.000000000000000D+00)*TMP_JAMP(2914)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(2975) - JAMP(67,1) = (-1.000000000000000D+00)*AMP(1403)+( + JAMP(67,1) = (-1.000000000000000D+00)*AMP(1401)+( $ -1.000000000000000D+00)*TMP_JAMP(1626)+(-1.000000000000000D+00) $ *TMP_JAMP(2144)+(-1.000000000000000D+00)*TMP_JAMP(2452)+( $ -1.000000000000000D+00)*TMP_JAMP(2678)+TMP_JAMP(2768) @@ -18314,13 +18314,13 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(1055)+((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(1058)+TMP_JAMP(1275)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(1342)+(-1.000000000000000D+00) - $ *AMP(1021)+((0.000000000000000D+00,-1.000000000000000D+00)) + $ *AMP(1019)+((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(2116)+TMP_JAMP(2144)+TMP_JAMP(2297)+( $ -1.000000000000000D+00)*TMP_JAMP(2341)+TMP_JAMP(2426)+( $ -1.000000000000000D+00)*TMP_JAMP(2486)+TMP_JAMP(2794)+( $ -1.000000000000000D+00)*TMP_JAMP(2999)+TMP_JAMP(3016) JAMP(69,1) = (-1.000000000000000D+00)*TMP_JAMP(1413)+( - $ -1.000000000000000D+00)*AMP(1401)+TMP_JAMP(2042)+TMP_JAMP(2149) + $ -1.000000000000000D+00)*AMP(1399)+TMP_JAMP(2042)+TMP_JAMP(2149) $ +TMP_JAMP(2578)+TMP_JAMP(2679)+TMP_JAMP(2731)+( $ -1.000000000000000D+00)*TMP_JAMP(2800)+(-1.000000000000000D+00) $ *TMP_JAMP(2883)+TMP_JAMP(3004) @@ -18337,7 +18337,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ -1.000000000000000D+00)*TMP_JAMP(2961) JAMP(71,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(1176)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(1296)+(-1.000000000000000D+00)*AMP(1018) + $ *TMP_JAMP(1296)+(-1.000000000000000D+00)*AMP(1016) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2091) $ +TMP_JAMP(2343)+(-1.000000000000000D+00)*TMP_JAMP(2800)+( $ -1.000000000000000D+00)*TMP_JAMP(2945)+(-1.000000000000000D+00) @@ -18359,11 +18359,11 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ ,1.000000000000000D+00))*TMP_JAMP(1761)+((0.000000000000000D+00 $ ,-1.000000000000000D+00))*TMP_JAMP(1764)+TMP_JAMP(1895)+( $ -1.000000000000000D+00)*TMP_JAMP(1932)+(-1.000000000000000D+00) - $ *AMP(1428)+TMP_JAMP(2569)+(-1.000000000000000D+00) + $ *AMP(1424)+TMP_JAMP(2569)+(-1.000000000000000D+00) $ *TMP_JAMP(2652)+TMP_JAMP(2683)+TMP_JAMP(2786)+TMP_JAMP(2796) $ +TMP_JAMP(2902) JAMP(74,1) = TMP_JAMP(2027)+TMP_JAMP(2042)+(-1.000000000000000D - $ +00)*AMP(1422)+TMP_JAMP(2383)+TMP_JAMP(2580)+( + $ +00)*AMP(1418)+TMP_JAMP(2383)+TMP_JAMP(2580)+( $ -1.000000000000000D+00)*TMP_JAMP(2683)+TMP_JAMP(2735)+( $ -1.000000000000000D+00)*TMP_JAMP(2798)+(-1.000000000000000D+00) $ *TMP_JAMP(2932)+TMP_JAMP(2942)+TMP_JAMP(3008) @@ -18372,14 +18372,14 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(1383)+(-1.000000000000000D+00)*TMP_JAMP(1386) $ +TMP_JAMP(1860)+(-1.000000000000000D+00)*TMP_JAMP(1863)+( $ -1.000000000000000D+00)*TMP_JAMP(1895)+TMP_JAMP(1899)+( - $ -1.000000000000000D+00)*AMP(1427)+TMP_JAMP(2627)+TMP_JAMP(2780) + $ -1.000000000000000D+00)*AMP(1423)+TMP_JAMP(2627)+TMP_JAMP(2780) $ +(-1.000000000000000D+00)*TMP_JAMP(2895)+(-1.000000000000000D $ +00)*TMP_JAMP(2936) JAMP(76,1) = (-1.000000000000000D+00)*TMP_JAMP(1038)+( $ -1.000000000000000D+00)*TMP_JAMP(1107)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(1185)+((0.000000000000000D+00 $ ,-1.000000000000000D+00))*TMP_JAMP(1203)+(-1.000000000000000D - $ +00)*AMP(1029)+(-1.000000000000000D+00)*TMP_JAMP(1899) + $ +00)*AMP(1027)+(-1.000000000000000D+00)*TMP_JAMP(1899) $ +TMP_JAMP(2043)+(-1.000000000000000D+00)*TMP_JAMP(2095)+( $ -1.000000000000000D+00)*TMP_JAMP(2328)+TMP_JAMP(2458)+( $ -1.000000000000000D+00)*TMP_JAMP(2611)+TMP_JAMP(2649)+( @@ -18388,13 +18388,13 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ +TMP_JAMP(3009) JAMP(77,1) = (-1.000000000000000D+00)*TMP_JAMP(800) $ +TMP_JAMP(1631)+(-1.000000000000000D+00)*TMP_JAMP(1812) - $ +TMP_JAMP(1898)+(-1.000000000000000D+00)*AMP(1421)+( + $ +TMP_JAMP(1898)+(-1.000000000000000D+00)*AMP(1417)+( $ -1.000000000000000D+00)*TMP_JAMP(2332)+TMP_JAMP(2537) $ +TMP_JAMP(2932)+(-1.000000000000000D+00)*TMP_JAMP(2936)+( $ -1.000000000000000D+00)*TMP_JAMP(2972)+TMP_JAMP(3023) JAMP(78,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(1216)+(-1.000000000000000D+00)*TMP_JAMP(1264)+( - $ -1.000000000000000D+00)*AMP(1028)+(-1.000000000000000D+00) + $ -1.000000000000000D+00)*AMP(1026)+(-1.000000000000000D+00) $ *TMP_JAMP(1494)+(-1.000000000000000D+00)*TMP_JAMP(1633) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1764) $ +(-1.000000000000000D+00)*TMP_JAMP(1898)+TMP_JAMP(2095)+( @@ -18408,7 +18408,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(1200)+TMP_JAMP(1626)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(1849)+(-1.000000000000000D+00) $ *TMP_JAMP(1883)+(-1.000000000000000D+00)*TMP_JAMP(2036)+( - $ -1.000000000000000D+00)*AMP(1431)+TMP_JAMP(2489)+( + $ -1.000000000000000D+00)*AMP(1427)+TMP_JAMP(2489)+( $ -1.000000000000000D+00)*TMP_JAMP(2505)+(-1.000000000000000D+00) $ *TMP_JAMP(2570)+(-1.000000000000000D+00)*TMP_JAMP(2630) $ +TMP_JAMP(2645)+TMP_JAMP(2686)+(-1.000000000000000D+00) @@ -18417,7 +18417,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ ,1.000000000000000D+00))*TMP_JAMP(1207)+((0.000000000000000D+00 $ ,-1.000000000000000D+00))*TMP_JAMP(1291)+TMP_JAMP(2037) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2138) - $ +(-1.000000000000000D+00)*AMP(1425)+(-1.000000000000000D+00) + $ +(-1.000000000000000D+00)*AMP(1421)+(-1.000000000000000D+00) $ *TMP_JAMP(2250)+(-1.000000000000000D+00)*TMP_JAMP(2381)+( $ -1.000000000000000D+00)*TMP_JAMP(2686)+(-1.000000000000000D+00) $ *TMP_JAMP(2699)+TMP_JAMP(2905)+TMP_JAMP(2987)+( @@ -18428,7 +18428,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ ,1.000000000000000D+00))*TMP_JAMP(1349)+((0.000000000000000D+00 $ ,-1.000000000000000D+00))*TMP_JAMP(1987)+TMP_JAMP(2020)+( $ -1.000000000000000D+00)*TMP_JAMP(2141)+(-1.000000000000000D+00) - $ *AMP(1429)+(-1.000000000000000D+00)*TMP_JAMP(2773) + $ *AMP(1425)+(-1.000000000000000D+00)*TMP_JAMP(2773) $ +TMP_JAMP(2864)+(-1.000000000000000D+00)*TMP_JAMP(2909) $ +TMP_JAMP(3011) JAMP(82,1) = (-1.000000000000000D+00)*AMP(404) @@ -18448,7 +18448,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(1212)+TMP_JAMP(1268)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(1868)+((0.000000000000000D+00 $ ,-1.000000000000000D+00))*TMP_JAMP(2011)+(-1.000000000000000D - $ +00)*AMP(1423)+TMP_JAMP(2451)+TMP_JAMP(2699)+( + $ +00)*AMP(1419)+TMP_JAMP(2451)+TMP_JAMP(2699)+( $ -1.000000000000000D+00)*TMP_JAMP(2772)+TMP_JAMP(2917)+( $ -1.000000000000000D+00)*TMP_JAMP(2939)+(-1.000000000000000D+00) $ *TMP_JAMP(2965) @@ -18465,7 +18465,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ -1.000000000000000D+00)*TMP_JAMP(2761)+(-1.000000000000000D+00) $ *TMP_JAMP(2880)+(-1.000000000000000D+00)*TMP_JAMP(2922) $ +TMP_JAMP(2965) - JAMP(85,1) = TMP_JAMP(1386)+(-1.000000000000000D+00)*AMP(1432)+( + JAMP(85,1) = TMP_JAMP(1386)+(-1.000000000000000D+00)*AMP(1428)+( $ -1.000000000000000D+00)*TMP_JAMP(2372)+TMP_JAMP(2387) $ +TMP_JAMP(2393)+TMP_JAMP(2427)+(-1.000000000000000D+00) $ *TMP_JAMP(2467)+(-1.000000000000000D+00)*TMP_JAMP(2505)+( @@ -18478,14 +18478,14 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(983)+TMP_JAMP(1107)+TMP_JAMP(1127) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1204) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1290) - $ +(-1.000000000000000D+00)*AMP(1031)+TMP_JAMP(2146)+( + $ +(-1.000000000000000D+00)*AMP(1029)+TMP_JAMP(2146)+( $ -1.000000000000000D+00)*TMP_JAMP(2480)+TMP_JAMP(2499)+( $ -1.000000000000000D+00)*TMP_JAMP(2721)+(-1.000000000000000D+00) $ *TMP_JAMP(2896)+((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(2977)+(-1.000000000000000D+00)*TMP_JAMP(2995) JAMP(87,1) = (-1.000000000000000D+00)*TMP_JAMP(1379)+( $ -1.000000000000000D+00)*TMP_JAMP(1953)+TMP_JAMP(2141)+( - $ -1.000000000000000D+00)*AMP(1430)+TMP_JAMP(2247)+TMP_JAMP(2403) + $ -1.000000000000000D+00)*AMP(1426)+TMP_JAMP(2247)+TMP_JAMP(2403) $ +TMP_JAMP(2882)+TMP_JAMP(2902)+(-1.000000000000000D+00) $ *TMP_JAMP(2929)+TMP_JAMP(3005) JAMP(88,1) = (-1.000000000000000D+00)*AMP(405)+( @@ -18504,7 +18504,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ ,1.000000000000000D+00))*TMP_JAMP(476)+TMP_JAMP(1007) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1052) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1219) - $ +(-1.000000000000000D+00)*AMP(1026)+TMP_JAMP(1696)+( + $ +(-1.000000000000000D+00)*AMP(1024)+TMP_JAMP(1696)+( $ -1.000000000000000D+00)*TMP_JAMP(1722)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(1858)+((0.000000000000000D+00 $ ,-1.000000000000000D+00))*TMP_JAMP(1901)+(-1.000000000000000D @@ -18526,22 +18526,22 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(3003) JAMP(91,1) = TMP_JAMP(647)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(2113)+(-1.000000000000000D+00) - $ *AMP(1426)+TMP_JAMP(2369)+TMP_JAMP(2502)+(-1.000000000000000D + $ *AMP(1422)+TMP_JAMP(2369)+TMP_JAMP(2502)+(-1.000000000000000D $ +00)*TMP_JAMP(2941)+(-1.000000000000000D+00)*TMP_JAMP(3023)+( $ -1.000000000000000D+00)*TMP_JAMP(3024) JAMP(92,1) = ((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(985)+((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(1204)+TMP_JAMP(1261)+TMP_JAMP(1280) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1350) - $ +(-1.000000000000000D+00)*AMP(1030)+((0.000000000000000D+00 + $ +(-1.000000000000000D+00)*AMP(1028)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(2113)+(-1.000000000000000D+00) $ *TMP_JAMP(2143)+TMP_JAMP(2334)+(-1.000000000000000D+00) $ *TMP_JAMP(2545)+TMP_JAMP(2714)+(-1.000000000000000D+00) $ *TMP_JAMP(2762)+TMP_JAMP(2857)+(-1.000000000000000D+00) $ *TMP_JAMP(3002) JAMP(93,1) = ((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(1769)+(-1.000000000000000D+00)*AMP(1424)+( - $ -1.000000000000000D+00)*AMP(1893)+TMP_JAMP(2465)+TMP_JAMP(2476) + $ *TMP_JAMP(1769)+(-1.000000000000000D+00)*AMP(1420)+( + $ -1.000000000000000D+00)*AMP(1889)+TMP_JAMP(2465)+TMP_JAMP(2476) $ +(-1.000000000000000D+00)*TMP_JAMP(2625)+(-1.000000000000000D $ +00)*TMP_JAMP(2917)+TMP_JAMP(2928)+(-1.000000000000000D+00) $ *TMP_JAMP(2931)+TMP_JAMP(2950)+TMP_JAMP(3024) @@ -18558,7 +18558,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ +00))*TMP_JAMP(237)+(-1.000000000000000D+00)*TMP_JAMP(1043) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1250) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1350) - $ +(-1.000000000000000D+00)*AMP(1027)+TMP_JAMP(2135) + $ +(-1.000000000000000D+00)*AMP(1025)+TMP_JAMP(2135) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2148) $ +(-1.000000000000000D+00)*TMP_JAMP(2355)+(-1.000000000000000D $ +00)*TMP_JAMP(2381)+TMP_JAMP(2757)+TMP_JAMP(2779)+( @@ -18578,13 +18578,13 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(1399)+(-1.000000000000000D+00)*TMP_JAMP(1953)+( $ -1.000000000000000D+00)*TMP_JAMP(2025)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(2121)+(-1.000000000000000D+00) - $ *AMP(1449)+TMP_JAMP(2234)+TMP_JAMP(2634)+(-1.000000000000000D + $ *AMP(1445)+TMP_JAMP(2234)+TMP_JAMP(2634)+(-1.000000000000000D $ +00)*TMP_JAMP(2671)+TMP_JAMP(2689)+TMP_JAMP(2727)+TMP_JAMP(2866) $ +TMP_JAMP(3012) JAMP(98,1) = ((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(1401)+TMP_JAMP(1952)+(-1.000000000000000D+00) $ *TMP_JAMP(2022)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2118)+(-1.000000000000000D+00)*AMP(1443) + $ *TMP_JAMP(2118)+(-1.000000000000000D+00)*AMP(1439) $ +TMP_JAMP(2390)+(-1.000000000000000D+00)*TMP_JAMP(2408) $ +TMP_JAMP(2456)+(-1.000000000000000D+00)*TMP_JAMP(2689) $ +TMP_JAMP(2841)+TMP_JAMP(2908)+(-1.000000000000000D+00) @@ -18593,13 +18593,13 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(1018)+TMP_JAMP(1376)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(1378)+TMP_JAMP(1913) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2121) - $ +TMP_JAMP(2124)+(-1.000000000000000D+00)*AMP(1448)+( + $ +TMP_JAMP(2124)+(-1.000000000000000D+00)*AMP(1444)+( $ -1.000000000000000D+00)*TMP_JAMP(2490)+(-1.000000000000000D+00) $ *TMP_JAMP(2638)+TMP_JAMP(2765)+(-1.000000000000000D+00) $ *TMP_JAMP(2843)+TMP_JAMP(2901) JAMP(100,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(746)+(-1.000000000000000D+00)*TMP_JAMP(1278)+( - $ -1.000000000000000D+00)*AMP(1038)+(-1.000000000000000D+00) + $ -1.000000000000000D+00)*AMP(1036)+(-1.000000000000000D+00) $ *TMP_JAMP(1913)+((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(2012)+((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(2119)+(-1.000000000000000D+00)*TMP_JAMP(2499) @@ -18608,13 +18608,13 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ -1.000000000000000D+00)*TMP_JAMP(2952)+TMP_JAMP(3020) JAMP(101,1) = TMP_JAMP(1910)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(2118)+TMP_JAMP(2124)+( - $ -1.000000000000000D+00)*AMP(1442)+AMP(1813)+TMP_JAMP(2342)+( + $ -1.000000000000000D+00)*AMP(1438)+AMP(1809)+TMP_JAMP(2342)+( $ -1.000000000000000D+00)*TMP_JAMP(2549)+(-1.000000000000000D+00) $ *TMP_JAMP(2842)+(-1.000000000000000D+00)*TMP_JAMP(2867) $ +TMP_JAMP(2984)+TMP_JAMP(3014) JAMP(102,1) = (-1.000000000000000D+00)*TMP_JAMP(1030) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1404) - $ +(-1.000000000000000D+00)*AMP(1037)+(-1.000000000000000D+00) + $ +(-1.000000000000000D+00)*AMP(1035)+(-1.000000000000000D+00) $ *TMP_JAMP(1809)+(-1.000000000000000D+00)*TMP_JAMP(1910) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2018) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2119) @@ -18624,7 +18624,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) JAMP(103,1) = ((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(1252)+((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(1388)+(-1.000000000000000D+00)*TMP_JAMP(2125)+( - $ -1.000000000000000D+00)*AMP(1452)+TMP_JAMP(2430)+( + $ -1.000000000000000D+00)*AMP(1448)+TMP_JAMP(2430)+( $ -1.000000000000000D+00)*TMP_JAMP(2447)+(-1.000000000000000D+00) $ *TMP_JAMP(2478)+(-1.000000000000000D+00)*TMP_JAMP(2633) $ +TMP_JAMP(2664)+(-1.000000000000000D+00)*TMP_JAMP(2848) @@ -18634,7 +18634,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ ,1.000000000000000D+00))*TMP_JAMP(845)+((0.000000000000000D+00 $ ,1.000000000000000D+00))*TMP_JAMP(962)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(1228)+TMP_JAMP(2126)+( - $ -1.000000000000000D+00)*AMP(1446)+(-1.000000000000000D+00) + $ -1.000000000000000D+00)*AMP(1442)+(-1.000000000000000D+00) $ *TMP_JAMP(2440)+(-1.000000000000000D+00)*TMP_JAMP(2457)+( $ -1.000000000000000D+00)*TMP_JAMP(2580)+TMP_JAMP(2739)+( $ -1.000000000000000D+00)*TMP_JAMP(2830)+(-1.000000000000000D+00) @@ -18644,7 +18644,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(989)+((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(1388)+(-1.000000000000000D+00)*TMP_JAMP(1670) $ +TMP_JAMP(2088)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2117)+(-1.000000000000000D+00)*AMP(1450) + $ *TMP_JAMP(2117)+(-1.000000000000000D+00)*AMP(1446) $ +TMP_JAMP(2901)+(-1.000000000000000D+00)*TMP_JAMP(2937)+( $ -1.000000000000000D+00)*TMP_JAMP(2944)+(-1.000000000000000D+00) $ *TMP_JAMP(3026) @@ -18666,7 +18666,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1304) $ +(-1.000000000000000D+00)*TMP_JAMP(1914)+TMP_JAMP(2089) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2137) - $ +(-1.000000000000000D+00)*AMP(1444)+TMP_JAMP(2576) + $ +(-1.000000000000000D+00)*AMP(1440)+TMP_JAMP(2576) $ +TMP_JAMP(2828)+(-1.000000000000000D+00)*TMP_JAMP(2939)+( $ -1.000000000000000D+00)*TMP_JAMP(3026) JAMP(108,1) = (-1.000000000000000D+00)*AMP(411) @@ -18674,7 +18674,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(301) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(334) $ +(-1.000000000000000D+00)*TMP_JAMP(437)+TMP_JAMP(440) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(596)+( + $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(594)+( $ -1.000000000000000D+00)*TMP_JAMP(781)+(-1.000000000000000D+00) $ *TMP_JAMP(817)+TMP_JAMP(846)+((0.000000000000000D+00, $ -1.000000000000000D+00))*TMP_JAMP(977)+((0.000000000000000D+00, @@ -18689,7 +18689,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(1378) $ +(-1.000000000000000D+00)*TMP_JAMP(1884)+(-1.000000000000000D $ +00)*TMP_JAMP(2039)+((0.000000000000000D+00,-1.000000000000000D - $ +00))*TMP_JAMP(2068)+(-1.000000000000000D+00)*AMP(1453)+( + $ +00))*TMP_JAMP(2068)+(-1.000000000000000D+00)*AMP(1449)+( $ -1.000000000000000D+00)*TMP_JAMP(2357)+TMP_JAMP(2523)+( $ -1.000000000000000D+00)*TMP_JAMP(2573)+TMP_JAMP(2678)+( $ -1.000000000000000D+00)*TMP_JAMP(2766)+TMP_JAMP(2775)+( @@ -18697,7 +18697,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) JAMP(110,1) = ((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(990)+((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(1248)+TMP_JAMP(1277)+(-1.000000000000000D+00) - $ *AMP(1040)+((0.000000000000000D+00,1.000000000000000D+00)) + $ *AMP(1038)+((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(1852)+TMP_JAMP(1884)+TMP_JAMP(2040) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2116) $ +(-1.000000000000000D+00)*TMP_JAMP(2338)+(-1.000000000000000D @@ -18705,7 +18705,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ +TMP_JAMP(3015)+(-1.000000000000000D+00)*TMP_JAMP(3020) JAMP(111,1) = TMP_JAMP(1516)+(-1.000000000000000D+00) $ *TMP_JAMP(1932)+((0.000000000000000D+00,1.000000000000000D+00)) - $ *TMP_JAMP(2117)+(-1.000000000000000D+00)*AMP(1451)+( + $ *TMP_JAMP(2117)+(-1.000000000000000D+00)*AMP(1447)+( $ -1.000000000000000D+00)*TMP_JAMP(2371)+TMP_JAMP(2519) $ +TMP_JAMP(2572)+(-1.000000000000000D+00)*TMP_JAMP(2679) $ +TMP_JAMP(2695)+TMP_JAMP(2787)+((0.000000000000000D+00 @@ -18724,7 +18724,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(78)+((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(321)+((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(739)+(-1.000000000000000D+00)*TMP_JAMP(1272)+( - $ -1.000000000000000D+00)*AMP(1035)+(-1.000000000000000D+00) + $ -1.000000000000000D+00)*AMP(1033)+(-1.000000000000000D+00) $ *TMP_JAMP(1810)+((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(2091)+TMP_JAMP(2803)+(-1.000000000000000D+00) $ *TMP_JAMP(2933)+TMP_JAMP(2991)+(-1.000000000000000D+00) @@ -18745,15 +18745,15 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(2915)+(-1.000000000000000D+00)*TMP_JAMP(2991) JAMP(115,1) = ((0.000000000000000D+00,-1.000000000000000D+00)) $ *TMP_JAMP(589)+((0.000000000000000D+00,-1.000000000000000D+00)) - $ *TMP_JAMP(2122)+(-1.000000000000000D+00)*AMP(1447)+( + $ *TMP_JAMP(2122)+(-1.000000000000000D+00)*AMP(1443)+( $ -1.000000000000000D+00)*TMP_JAMP(2373)+TMP_JAMP(2550)+( $ -1.000000000000000D+00)*TMP_JAMP(2574)+(-1.000000000000000D+00) $ *TMP_JAMP(2582)+(-1.000000000000000D+00)*TMP_JAMP(2626) $ +TMP_JAMP(2629)+TMP_JAMP(2941)+(-1.000000000000000D+00) $ *TMP_JAMP(3014) - JAMP(116,1) = TMP_JAMP(1279)+(-1.000000000000000D+00)*AMP(1039) + JAMP(116,1) = TMP_JAMP(1279)+(-1.000000000000000D+00)*AMP(1037) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2122) - $ +TMP_JAMP(2143)+AMP(1669)+(-1.000000000000000D+00) + $ +TMP_JAMP(2143)+AMP(1665)+(-1.000000000000000D+00) $ *TMP_JAMP(2371)+(-1.000000000000000D+00)*TMP_JAMP(2619)+( $ -1.000000000000000D+00)*TMP_JAMP(2823)+TMP_JAMP(2853)+( $ -1.000000000000000D+00)*TMP_JAMP(2989)+(-1.000000000000000D+00) @@ -18761,7 +18761,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) JAMP(117,1) = ((0.000000000000000D+00,1.000000000000000D+00)) $ *TMP_JAMP(589)+(-1.000000000000000D+00)*TMP_JAMP(1658) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(2137) - $ +(-1.000000000000000D+00)*AMP(1445)+AMP(1519)+TMP_JAMP(2596) + $ +(-1.000000000000000D+00)*AMP(1441)+AMP(1515)+TMP_JAMP(2596) $ +TMP_JAMP(2624)+TMP_JAMP(2633)+TMP_JAMP(2884)+TMP_JAMP(2908)+( $ -1.000000000000000D+00)*TMP_JAMP(2928)+TMP_JAMP(2959) JAMP(118,1) = ((0.000000000000000D+00,1.000000000000000D+00)) @@ -18777,7 +18777,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ *TMP_JAMP(2858)+TMP_JAMP(2918)+(-1.000000000000000D+00) $ *TMP_JAMP(2959) JAMP(119,1) = (-1.000000000000000D+00)*TMP_JAMP(1041)+( - $ -1.000000000000000D+00)*AMP(1036)+TMP_JAMP(1608) + $ -1.000000000000000D+00)*AMP(1034)+TMP_JAMP(1608) $ +((0.000000000000000D+00,-1.000000000000000D+00))*TMP_JAMP(2148) $ +(-1.000000000000000D+00)*TMP_JAMP(2614)+TMP_JAMP(2635) $ +TMP_JAMP(2933)+TMP_JAMP(2992)+TMP_JAMP(3019) @@ -18790,7 +18790,7 @@ REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) $ +TMP_JAMP(531)+(-1.000000000000000D+00)*TMP_JAMP(1418)+( $ -1.000000000000000D+00)*TMP_JAMP(1673)+TMP_JAMP(1724) $ +((0.000000000000000D+00,1.000000000000000D+00))*TMP_JAMP(1797) - $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(1462) + $ +((0.000000000000000D+00,-1.000000000000000D+00))*AMP(1458) $ +TMP_JAMP(2619)+(-1.000000000000000D+00)*TMP_JAMP(2634) $ +TMP_JAMP(2670)+(-1.000000000000000D+00)*TMP_JAMP(2916)+( $ -1.000000000000000D+00)*TMP_JAMP(2992) diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/genps.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/genps.f index fe9c61504b..c00e33d954 100644 --- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/genps.f +++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/genps.f @@ -1877,12 +1877,12 @@ double precision function get_channel_cut(p, config) d1 = iforest(1, -i, config) d2 = iforest(2, -i, config) do j=0,3 - if (d1.gt.0.and.d1.le.2) then + if (d1.gt.0.and.d1.le.nincoming) then ptemp(j,-i) = ptemp(j,-i) - ptemp(j, d1) else ptemp(j,-i) = ptemp(j,-i)+ptemp(j, d1) endif - if (d2.gt.0.and.d2.le.2) then + if (d2.gt.0.and.d2.le.nincoming) then ptemp(j,-i) = ptemp(j,-i) - ptemp(j, d2) else ptemp(j,-i) = ptemp(j,-i)+ptemp(j, d2) diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/__init__.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/__init__.py index 16e60d8182..0d17042f0d 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/__init__.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/__init__.py @@ -26,6 +26,7 @@ class aMCatNLOError(MadGraph5Error): import os import logging import time +pjoin = os.path.join #Look for basic file position MG5DIR and MG4DIR MG5DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py index ef1bf58979..3995ce8109 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py @@ -2704,7 +2704,8 @@ def __new__(cls, finput=None, **opt): except Exception as error: import launch_plugin target_class = launch_plugin.RunCard - + elif issubclass(finput, RunCard): + target_class = finput else: return None @@ -2968,11 +2969,12 @@ def write(self, output_file, template=None, python_template=False, if python_template and not to_write: import string if self.blocks: - text = string.Template(text) mapping = {} for b in self.blocks: mapping[b.name] = b.get_template(self) - text = text.substitute(mapping) + if "$%s" % b.name not in text: + text += "\n$%s\n" % b.name + text = string.Template(text).substitute(mapping) if not self.list_parameter: text = text % self @@ -4875,6 +4877,9 @@ def create_default_for_process(self, proc_characteristic, history, proc_def): continue break + if proc_characteristic['ninitial'] == 1: + self['SDE_strategy'] =1 + if 'MLM' in proc_characteristic['limitations']: if self['dynamical_scale_choice'] == -1: self['dynamical_scale_choice'] = 3 @@ -5940,7 +5945,7 @@ def default_setup(self): self.add_param("CheckCycle", 3) self.add_param("MaxAttempts", 10) self.add_param("ZeroThres", 1e-9) - self.add_param("OSThres", 1.0e-13) + self.add_param("OSThres", 1.0e-8) self.add_param("DoubleCheckHelicityFilter", True) self.add_param("WriteOutFilters", True) self.add_param("UseLoopFilter", False) diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/common_run_interface.py index 14c7f310dc..87cb4b88df 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/common_run_interface.py @@ -4906,6 +4906,7 @@ def __init__(self, question, cards=[], from_banner=None, banner=None, mode='auto self.load_default() self.define_paths(**opt) self.last_editline_pos = 0 + self.update_dependent_done = False if 'allow_arg' not in opt or not opt['allow_arg']: # add some mininal content for this: @@ -6585,7 +6586,9 @@ def postcmd(self, stop, line): self.check_card_consistency() if self.param_consistency: try: - self.do_update('dependent', timer=20) + if not self.update_dependent_done: + self.do_update('dependent', timer=20) + self.update_dependent_done = False except MadGraph5Error as error: if 'Missing block:' in str(error): self.fail_due_to_format +=1 @@ -6638,6 +6641,8 @@ def do_update(self, line, timer=0): self.update_dependent(self.mother_interface, self.me_dir, self.param_card, self.paths['param'], timer, run_card=self.run_card, lhapdfconfig=self.lhapdf) + self.update_dependent_done = True + elif args[0] == 'missing': self.update_missing() @@ -6717,12 +6722,13 @@ class TimeOutError(Exception): def handle_alarm(signum, frame): raise TimeOutError signal.signal(signal.SIGALRM, handle_alarm) + if timer: - signal.alarm(timer) log_level=30 else: log_level=20 + if run_card: as_for_pdf = {'cteq6_m': 0.118, 'cteq6_d': 0.118, @@ -6781,6 +6787,10 @@ def handle_alarm(signum, frame): logger.log(log_level, "update the strong coupling value (alpha_s) to the value from the pdf selected: %s", as_for_pdf[pdlabel]) modify = True + if timer: + signal.alarm(timer) + + # Try to load the model in the limited amount of time allowed try: model = mecmd.get_model() @@ -6909,7 +6919,8 @@ def check_block(self, blockname): def check_answer_consistency(self): """function called if the code reads a file""" self.check_card_consistency() - self.do_update('dependent', timer=20) + if not self.update_dependent_done: + self.do_update('dependent', timer=20) def help_set(self): '''help message for set''' diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/gen_ximprove.py index a88d60b282..5fd170d18d 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/gen_ximprove.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/gen_ximprove.py @@ -157,12 +157,16 @@ def get_helicity(self, to_submit=True, clean=True): (stdout, _) = p.communicate(''.encode()) stdout = stdout.decode('ascii',errors='ignore') - try: + if stdout: nb_channel = max([math.floor(float(d)) for d in stdout.split()]) - except Exception as error: - misc.sprint(stdout, 'no channel or error for %s' % Pdir) - continue - + else: + for matrix_file in misc.glob('matrix*orig.f', Pdir): + files.cp(matrix_file, matrix_file.replace('orig','optim')) + P_zero_result.append(Pdir) + if os.path.exists(pjoin(self.me_dir, 'error')): + os.remove(pjoin(self.me_dir, 'error')) + continue # bypass bad process + self.cmd.compile(['madevent_forhel'], cwd=Pdir) if not os.path.exists(pjoin(Pdir, 'madevent_forhel')): raise Exception('Error make madevent_forhel not successful') @@ -183,11 +187,13 @@ def get_helicity(self, to_submit=True, clean=True): #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts (stdout, _) = p.communicate(" ".encode()) stdout = stdout.decode('ascii',errors='ignore') - if os.path.exists(pjoin(self.me_dir,'error')): + if os.path.exists(pjoin(self.me_dir, 'error')): raise Exception(pjoin(self.me_dir,'error')) # note a continue is not enough here, we have in top to link # the matrixX_optim.f to matrixX_orig.f to let the code to work # after this error. + # for matrix_file in misc.glob('matrix*orig.f', Pdir): + # files.cp(matrix_file, matrix_file.replace('orig','optim')) if 'no events passed cuts' in stdout: raise Exception diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py index c9d1c7706a..0b849330ef 100644 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py @@ -57,7 +57,7 @@ def reset_simd(self, old_value, new_value, name): return Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - + def plugin_input(self, finput): return @@ -79,7 +79,7 @@ def check_validity(self): self['sde_strategy'] = 1 if self['hel_recycling']: self['hel_recycling'] = False - + class GPURunCard(CPPRunCard): def default_setup(self): super(CPPRunCard, self).default_setup() diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/madevent_interface.py index d722702891..853aabc98a 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/madevent_interface.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/madevent_interface.py @@ -1,4 +1,4 @@ -################################################################################ +############################################################################### # # Copyright (c) 2011 The MadGraph5_aMC@NLO Development team and Contributors # @@ -3675,7 +3675,7 @@ def do_refine(self, line): devnull.close() ############################################################################ - def do_combine_iteration(self, line): + def do_comine_iteration(self, line): """Not in help: Combine a given iteration combine_iteration Pdir Gdir S|R step S is for survey R is for refine @@ -3703,7 +3703,7 @@ def do_combine_iteration(self, line): ############################################################################ def do_combine_events(self, line): """Advanced commands: Launch combine events""" - + start=time.time() args = self.split_arg(line) start = time.time() # Check argument's validity @@ -3798,9 +3798,7 @@ def do_combine_events(self, line): self.correct_bias() elif self.run_card['custom_fcts']: self.correct_bias() - - logger.info("combine events done in %s", time.time()-start) - + logger.info("combination of events done in %s s ", time.time()-start) self.to_store.append('event') @@ -7368,7 +7366,7 @@ def wait_monitoring(Idle, Running, Done): import optparse # Get the directory of the script real path (bin) # and add it to the current PYTHONPATH - root_path = os.path.dirname(os.path.dirname(os.path.realpath( __file__ ))) + #root_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath( __file__ )))) sys.path.insert(0, root_path) class MyOptParser(optparse.OptionParser): @@ -7411,7 +7409,13 @@ def error(self, msg=''): import logging.config # Set logging level according to the logging level given by options #logging.basicConfig(level=vars(logging)[options.logging]) + import internal import internal.coloring_logging + # internal.file = XXX/bin/internal/__init__.py + # => need three dirname to get XXX + # we use internal to have any issue with pythonpath finding the wrong file + me_dir = os.path.dirname(os.path.dirname(os.path.dirname(internal.__file__))) + print("me_dir is", me_dir) try: if __debug__ and options.logging == 'INFO': options.logging = 'DEBUG' @@ -7419,7 +7423,8 @@ def error(self, msg=''): level = int(options.logging) else: level = eval('logging.' + options.logging) - logging.config.fileConfig(os.path.join(root_path, 'internal', 'me5_logging.conf')) + log_path = os.path.join(me_dir, 'bin', 'internal', 'me5_logging.conf') + logging.config.fileConfig(log_path) logging.root.setLevel(level) logging.getLogger('madgraph').setLevel(level) except: @@ -7433,9 +7438,9 @@ def error(self, msg=''): if '--web' in args: i = args.index('--web') args.pop(i) - cmd_line = MadEventCmd(os.path.dirname(root_path),force_run=True) + cmd_line = MadEventCmd(me_dir, force_run=True) else: - cmd_line = MadEventCmdShell(os.path.dirname(root_path),force_run=True) + cmd_line = MadEventCmdShell(me_dir, force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print(parser_error) diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/misc.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/misc.py index d3fed3baa2..91cd3e5c22 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/misc.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/misc.py @@ -347,7 +347,7 @@ def tell(msg): if dependency=='ninja': if cmd.options['ninja'] in ['None',None,''] or\ (cmd.options['ninja'] == './HEPTools/lib' and not MG5dir is None and\ - which_lib(pjoin(MG5dir,cmd.options['ninja'],'libninja.a')) is None): + which_lib(pjoin(MG5dir,cmd.options['ninja'],'lib','libninja.a')) is None): tell("Installing ninja...") cmd.do_install('ninja') diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/shower_card.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/shower_card.py index c6d3948cc4..c344ea1b15 100755 --- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/shower_card.py +++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/shower_card.py @@ -45,7 +45,9 @@ class ShowerCard(dict): false = ['.false.', 'f', 'false', '0'] logical_vars = ['ue_enabled', 'hadronize', 'b_stable', 'pi_stable', 'wp_stable', 'wm_stable', 'z_stable', 'h_stable', 'tap_stable', 'tam_stable', - 'mup_stable', 'mum_stable', 'is_4lep', 'is_bbar', 'combine_td'] + 'mup_stable', 'mum_stable', 'is_4lep', 'is_bbar', 'combine_td', + 'space_shower_me_corrections', 'time_shower_me_corrections', + 'time_shower_me_extended', 'time_shower_me_after_first'] string_vars = ['extralibs', 'extrapaths', 'includepaths', 'analyse'] for i in range(1,100): string_vars.append('dm_'+str(i)) @@ -82,7 +84,11 @@ class ShowerCard(dict): 'b_mass' : {'HERWIG6':'b_mass', 'PYTHIA6': 'b_mass', 'HERWIGPP': 'b_mass', 'PYTHIA8': 'b_mass'}, 'analyse' : {'HERWIG6':'hwuti', 'PYTHIA6':'pyuti', 'HERWIGPP':'hwpputi', 'PYTHIA8':'py8uti'}, 'qcut' : {'PYTHIA8':'qcut'}, - 'njmax' : {'PYTHIA8':'njmax'}} + 'njmax' : {'PYTHIA8':'njmax'}, + 'space_shower_me_corrections' : {'PYTHIA8':'space_shower_me_corrections'}, + 'time_shower_me_corrections' : {'PYTHIA8':'time_shower_me_corrections'}, + 'time_shower_me_extended' : {'PYTHIA8':'time_shower_me_extended'}, + 'time_shower_me_after_first' : {'PYTHIA8':'time_shower_me_after_first'}} stdhep_dict = {'HERWIG6':'mcatnlo_hwan_stdhep.o', 'PYTHIA6':'mcatnlo_pyan_stdhep.o'} diff --git a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h index 9b946c21e1..8df465ad6d 100644 --- a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h @@ -8,7 +8,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttggg.mad/src/Parameters_sm.cc b/epochX/cudacpp/gg_ttggg.mad/src/Parameters_sm.cc index 9d09eb6b62..64fc3fea62 100644 --- a/epochX/cudacpp/gg_ttggg.mad/src/Parameters_sm.cc +++ b/epochX/cudacpp/gg_ttggg.mad/src/Parameters_sm.cc @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttggg.mad/src/Parameters_sm.h b/epochX/cudacpp/gg_ttggg.mad/src/Parameters_sm.h index 6b32c66b9b..b6568d3761 100644 --- a/epochX/cudacpp/gg_ttggg.mad/src/Parameters_sm.h +++ b/epochX/cudacpp/gg_ttggg.mad/src/Parameters_sm.h @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index e8d8232be5..33bae20142 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -14,7 +14,7 @@ Running MG5 in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect 2023-08-08 * +* VERSION 3.5.2_lo_vect 2023-11-08 * * * * WARNING: UNKNOWN DEVELOPMENT VERSION. * * WARNING: DO NOT USE FOR PRODUCTION * @@ -62,7 +62,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005519866943359375  +DEBUG: model prefixing takes 0.005532503128051758  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,7 +155,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.893 s +1 processes with 1240 diagrams generated in 1.880 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Load PLUGIN.CUDACPP_OUTPUT @@ -175,7 +175,7 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.604 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.540 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -183,7 +183,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.356 s +ALOHA: aloha creates 5 routines in 0.351 s VVV1 VVV1 FFV1 @@ -207,6 +207,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m13.085s -user 0m12.921s -sys 0m0.106s +real 0m15.959s +user 0m12.810s +sys 0m0.102s diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc index a67b74e5b7..30acce4afc 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.h b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.h index dc41720ca6..2565923dde 100644 --- a/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.h +++ b/epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h index 9b946c21e1..8df465ad6d 100644 --- a/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h @@ -8,7 +8,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttggg.sa/src/Parameters_sm.cc b/epochX/cudacpp/gg_ttggg.sa/src/Parameters_sm.cc index 9d09eb6b62..64fc3fea62 100644 --- a/epochX/cudacpp/gg_ttggg.sa/src/Parameters_sm.cc +++ b/epochX/cudacpp/gg_ttggg.sa/src/Parameters_sm.cc @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_ttggg.sa/src/Parameters_sm.h b/epochX/cudacpp/gg_ttggg.sa/src/Parameters_sm.h index 6b32c66b9b..b6568d3761 100644 --- a/epochX/cudacpp/gg_ttggg.sa/src/Parameters_sm.h +++ b/epochX/cudacpp/gg_ttggg.sa/src/Parameters_sm.h @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 2338d395b7..89cb2749b0 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -14,7 +14,7 @@ Running MG5 in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect 2023-08-08 * +* VERSION 3.5.2_lo_vect 2023-11-08 * * * * WARNING: UNKNOWN DEVELOPMENT VERSION. * * WARNING: DO NOT USE FOR PRODUCTION * @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005399465560913086  +DEBUG: model prefixing takes 0.0057373046875  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.079 s +8 processes with 40 diagrams generated in 0.078 s Total: 8 processes with 40 diagrams output madevent ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -198,7 +198,7 @@ INFO: Combined process g d~ > t t~ d~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -207,15 +207,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -224,23 +224,23 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s -Wrote files for 32 helas calls in 0.222 s +Wrote files for 32 helas calls in 0.217 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.147 s +ALOHA: aloha creates 2 routines in 0.144 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.133 s +ALOHA: aloha creates 4 routines in 0.132 s FFV1 FFV1 FFV1 @@ -266,6 +266,7 @@ patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py +Hunk #1 succeeded at 391 (offset 6 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f @@ -294,9 +295,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.957s -user 0m1.708s -sys 0m0.241s +real 0m4.915s +user 0m1.680s +sys 0m0.237s ************************************************************ * * * W E L C O M E to * @@ -309,7 +310,7 @@ sys 0m0.241s * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect * +* VERSION 3.5.2_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * @@ -342,7 +343,7 @@ launch in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect * +* VERSION 3.5.2_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat index dc07af3836..efb0752a31 100644 --- a/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat @@ -8,7 +8,7 @@ #* * * * #* * #* * -#* VERSION 3.5.1_lo_vect 2023-08-08 * +#* VERSION 3.5.2_lo_vect 2023-11-08 * #* * #* WARNING: UNKNOWN DEVELOPMENT VERSION. * #* WARNING: DO NOT USE FOR PRODUCTION * diff --git a/epochX/cudacpp/gq_ttq.mad/MGMEVersion.txt b/epochX/cudacpp/gq_ttq.mad/MGMEVersion.txt index 1c1a95761b..85c67c3554 100644 --- a/epochX/cudacpp/gq_ttq.mad/MGMEVersion.txt +++ b/epochX/cudacpp/gq_ttq.mad/MGMEVersion.txt @@ -1 +1 @@ -3.5.1_lo_vect \ No newline at end of file +3.5.2_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MGVersion.txt index 1c1a95761b..85c67c3554 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MGVersion.txt +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MGVersion.txt @@ -1 +1 @@ -3.5.1_lo_vect \ No newline at end of file +3.5.2_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index c526dd6b31..649c608210 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h index cdc2dc91ac..bf037c6c28 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f index 249a3e4e3c..6c1667bc0f 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index ba39cab867..ee1484ab56 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -231,7 +231,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f index e6d01dad0b..bd8e2f143a 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -333,7 +333,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index 8d92e4e769..930da28159 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h index a90abc4ab4..0f49f5247b 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f index f2eba72de7..c9b8759b60 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index 5ec9701b78..62c235de64 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -231,7 +231,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f index 7a2e329e64..4c05be74a0 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -333,7 +333,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/genps.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/genps.f index fe9c61504b..c00e33d954 100644 --- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/genps.f +++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/genps.f @@ -1877,12 +1877,12 @@ double precision function get_channel_cut(p, config) d1 = iforest(1, -i, config) d2 = iforest(2, -i, config) do j=0,3 - if (d1.gt.0.and.d1.le.2) then + if (d1.gt.0.and.d1.le.nincoming) then ptemp(j,-i) = ptemp(j,-i) - ptemp(j, d1) else ptemp(j,-i) = ptemp(j,-i)+ptemp(j, d1) endif - if (d2.gt.0.and.d2.le.2) then + if (d2.gt.0.and.d2.le.nincoming) then ptemp(j,-i) = ptemp(j,-i) - ptemp(j, d2) else ptemp(j,-i) = ptemp(j,-i)+ptemp(j, d2) diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/__init__.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/__init__.py index 16e60d8182..0d17042f0d 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/__init__.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/__init__.py @@ -26,6 +26,7 @@ class aMCatNLOError(MadGraph5Error): import os import logging import time +pjoin = os.path.join #Look for basic file position MG5DIR and MG4DIR MG5DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py index ef1bf58979..3995ce8109 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py @@ -2704,7 +2704,8 @@ def __new__(cls, finput=None, **opt): except Exception as error: import launch_plugin target_class = launch_plugin.RunCard - + elif issubclass(finput, RunCard): + target_class = finput else: return None @@ -2968,11 +2969,12 @@ def write(self, output_file, template=None, python_template=False, if python_template and not to_write: import string if self.blocks: - text = string.Template(text) mapping = {} for b in self.blocks: mapping[b.name] = b.get_template(self) - text = text.substitute(mapping) + if "$%s" % b.name not in text: + text += "\n$%s\n" % b.name + text = string.Template(text).substitute(mapping) if not self.list_parameter: text = text % self @@ -4875,6 +4877,9 @@ def create_default_for_process(self, proc_characteristic, history, proc_def): continue break + if proc_characteristic['ninitial'] == 1: + self['SDE_strategy'] =1 + if 'MLM' in proc_characteristic['limitations']: if self['dynamical_scale_choice'] == -1: self['dynamical_scale_choice'] = 3 @@ -5940,7 +5945,7 @@ def default_setup(self): self.add_param("CheckCycle", 3) self.add_param("MaxAttempts", 10) self.add_param("ZeroThres", 1e-9) - self.add_param("OSThres", 1.0e-13) + self.add_param("OSThres", 1.0e-8) self.add_param("DoubleCheckHelicityFilter", True) self.add_param("WriteOutFilters", True) self.add_param("UseLoopFilter", False) diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/common_run_interface.py index 14c7f310dc..87cb4b88df 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/common_run_interface.py @@ -4906,6 +4906,7 @@ def __init__(self, question, cards=[], from_banner=None, banner=None, mode='auto self.load_default() self.define_paths(**opt) self.last_editline_pos = 0 + self.update_dependent_done = False if 'allow_arg' not in opt or not opt['allow_arg']: # add some mininal content for this: @@ -6585,7 +6586,9 @@ def postcmd(self, stop, line): self.check_card_consistency() if self.param_consistency: try: - self.do_update('dependent', timer=20) + if not self.update_dependent_done: + self.do_update('dependent', timer=20) + self.update_dependent_done = False except MadGraph5Error as error: if 'Missing block:' in str(error): self.fail_due_to_format +=1 @@ -6638,6 +6641,8 @@ def do_update(self, line, timer=0): self.update_dependent(self.mother_interface, self.me_dir, self.param_card, self.paths['param'], timer, run_card=self.run_card, lhapdfconfig=self.lhapdf) + self.update_dependent_done = True + elif args[0] == 'missing': self.update_missing() @@ -6717,12 +6722,13 @@ class TimeOutError(Exception): def handle_alarm(signum, frame): raise TimeOutError signal.signal(signal.SIGALRM, handle_alarm) + if timer: - signal.alarm(timer) log_level=30 else: log_level=20 + if run_card: as_for_pdf = {'cteq6_m': 0.118, 'cteq6_d': 0.118, @@ -6781,6 +6787,10 @@ def handle_alarm(signum, frame): logger.log(log_level, "update the strong coupling value (alpha_s) to the value from the pdf selected: %s", as_for_pdf[pdlabel]) modify = True + if timer: + signal.alarm(timer) + + # Try to load the model in the limited amount of time allowed try: model = mecmd.get_model() @@ -6909,7 +6919,8 @@ def check_block(self, blockname): def check_answer_consistency(self): """function called if the code reads a file""" self.check_card_consistency() - self.do_update('dependent', timer=20) + if not self.update_dependent_done: + self.do_update('dependent', timer=20) def help_set(self): '''help message for set''' diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/gen_ximprove.py index a88d60b282..5fd170d18d 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/gen_ximprove.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/gen_ximprove.py @@ -157,12 +157,16 @@ def get_helicity(self, to_submit=True, clean=True): (stdout, _) = p.communicate(''.encode()) stdout = stdout.decode('ascii',errors='ignore') - try: + if stdout: nb_channel = max([math.floor(float(d)) for d in stdout.split()]) - except Exception as error: - misc.sprint(stdout, 'no channel or error for %s' % Pdir) - continue - + else: + for matrix_file in misc.glob('matrix*orig.f', Pdir): + files.cp(matrix_file, matrix_file.replace('orig','optim')) + P_zero_result.append(Pdir) + if os.path.exists(pjoin(self.me_dir, 'error')): + os.remove(pjoin(self.me_dir, 'error')) + continue # bypass bad process + self.cmd.compile(['madevent_forhel'], cwd=Pdir) if not os.path.exists(pjoin(Pdir, 'madevent_forhel')): raise Exception('Error make madevent_forhel not successful') @@ -183,11 +187,13 @@ def get_helicity(self, to_submit=True, clean=True): #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts (stdout, _) = p.communicate(" ".encode()) stdout = stdout.decode('ascii',errors='ignore') - if os.path.exists(pjoin(self.me_dir,'error')): + if os.path.exists(pjoin(self.me_dir, 'error')): raise Exception(pjoin(self.me_dir,'error')) # note a continue is not enough here, we have in top to link # the matrixX_optim.f to matrixX_orig.f to let the code to work # after this error. + # for matrix_file in misc.glob('matrix*orig.f', Pdir): + # files.cp(matrix_file, matrix_file.replace('orig','optim')) if 'no events passed cuts' in stdout: raise Exception diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py index c9d1c7706a..0b849330ef 100644 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py @@ -57,7 +57,7 @@ def reset_simd(self, old_value, new_value, name): return Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - + def plugin_input(self, finput): return @@ -79,7 +79,7 @@ def check_validity(self): self['sde_strategy'] = 1 if self['hel_recycling']: self['hel_recycling'] = False - + class GPURunCard(CPPRunCard): def default_setup(self): super(CPPRunCard, self).default_setup() diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/madevent_interface.py index d722702891..853aabc98a 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/madevent_interface.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/madevent_interface.py @@ -1,4 +1,4 @@ -################################################################################ +############################################################################### # # Copyright (c) 2011 The MadGraph5_aMC@NLO Development team and Contributors # @@ -3675,7 +3675,7 @@ def do_refine(self, line): devnull.close() ############################################################################ - def do_combine_iteration(self, line): + def do_comine_iteration(self, line): """Not in help: Combine a given iteration combine_iteration Pdir Gdir S|R step S is for survey R is for refine @@ -3703,7 +3703,7 @@ def do_combine_iteration(self, line): ############################################################################ def do_combine_events(self, line): """Advanced commands: Launch combine events""" - + start=time.time() args = self.split_arg(line) start = time.time() # Check argument's validity @@ -3798,9 +3798,7 @@ def do_combine_events(self, line): self.correct_bias() elif self.run_card['custom_fcts']: self.correct_bias() - - logger.info("combine events done in %s", time.time()-start) - + logger.info("combination of events done in %s s ", time.time()-start) self.to_store.append('event') @@ -7368,7 +7366,7 @@ def wait_monitoring(Idle, Running, Done): import optparse # Get the directory of the script real path (bin) # and add it to the current PYTHONPATH - root_path = os.path.dirname(os.path.dirname(os.path.realpath( __file__ ))) + #root_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath( __file__ )))) sys.path.insert(0, root_path) class MyOptParser(optparse.OptionParser): @@ -7411,7 +7409,13 @@ def error(self, msg=''): import logging.config # Set logging level according to the logging level given by options #logging.basicConfig(level=vars(logging)[options.logging]) + import internal import internal.coloring_logging + # internal.file = XXX/bin/internal/__init__.py + # => need three dirname to get XXX + # we use internal to have any issue with pythonpath finding the wrong file + me_dir = os.path.dirname(os.path.dirname(os.path.dirname(internal.__file__))) + print("me_dir is", me_dir) try: if __debug__ and options.logging == 'INFO': options.logging = 'DEBUG' @@ -7419,7 +7423,8 @@ def error(self, msg=''): level = int(options.logging) else: level = eval('logging.' + options.logging) - logging.config.fileConfig(os.path.join(root_path, 'internal', 'me5_logging.conf')) + log_path = os.path.join(me_dir, 'bin', 'internal', 'me5_logging.conf') + logging.config.fileConfig(log_path) logging.root.setLevel(level) logging.getLogger('madgraph').setLevel(level) except: @@ -7433,9 +7438,9 @@ def error(self, msg=''): if '--web' in args: i = args.index('--web') args.pop(i) - cmd_line = MadEventCmd(os.path.dirname(root_path),force_run=True) + cmd_line = MadEventCmd(me_dir, force_run=True) else: - cmd_line = MadEventCmdShell(os.path.dirname(root_path),force_run=True) + cmd_line = MadEventCmdShell(me_dir, force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print(parser_error) diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/misc.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/misc.py index d3fed3baa2..91cd3e5c22 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/misc.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/misc.py @@ -347,7 +347,7 @@ def tell(msg): if dependency=='ninja': if cmd.options['ninja'] in ['None',None,''] or\ (cmd.options['ninja'] == './HEPTools/lib' and not MG5dir is None and\ - which_lib(pjoin(MG5dir,cmd.options['ninja'],'libninja.a')) is None): + which_lib(pjoin(MG5dir,cmd.options['ninja'],'lib','libninja.a')) is None): tell("Installing ninja...") cmd.do_install('ninja') diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/shower_card.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/shower_card.py index c6d3948cc4..c344ea1b15 100755 --- a/epochX/cudacpp/gq_ttq.mad/bin/internal/shower_card.py +++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/shower_card.py @@ -45,7 +45,9 @@ class ShowerCard(dict): false = ['.false.', 'f', 'false', '0'] logical_vars = ['ue_enabled', 'hadronize', 'b_stable', 'pi_stable', 'wp_stable', 'wm_stable', 'z_stable', 'h_stable', 'tap_stable', 'tam_stable', - 'mup_stable', 'mum_stable', 'is_4lep', 'is_bbar', 'combine_td'] + 'mup_stable', 'mum_stable', 'is_4lep', 'is_bbar', 'combine_td', + 'space_shower_me_corrections', 'time_shower_me_corrections', + 'time_shower_me_extended', 'time_shower_me_after_first'] string_vars = ['extralibs', 'extrapaths', 'includepaths', 'analyse'] for i in range(1,100): string_vars.append('dm_'+str(i)) @@ -82,7 +84,11 @@ class ShowerCard(dict): 'b_mass' : {'HERWIG6':'b_mass', 'PYTHIA6': 'b_mass', 'HERWIGPP': 'b_mass', 'PYTHIA8': 'b_mass'}, 'analyse' : {'HERWIG6':'hwuti', 'PYTHIA6':'pyuti', 'HERWIGPP':'hwpputi', 'PYTHIA8':'py8uti'}, 'qcut' : {'PYTHIA8':'qcut'}, - 'njmax' : {'PYTHIA8':'njmax'}} + 'njmax' : {'PYTHIA8':'njmax'}, + 'space_shower_me_corrections' : {'PYTHIA8':'space_shower_me_corrections'}, + 'time_shower_me_corrections' : {'PYTHIA8':'time_shower_me_corrections'}, + 'time_shower_me_extended' : {'PYTHIA8':'time_shower_me_extended'}, + 'time_shower_me_after_first' : {'PYTHIA8':'time_shower_me_after_first'}} stdhep_dict = {'HERWIG6':'mcatnlo_hwan_stdhep.o', 'PYTHIA6':'mcatnlo_pyan_stdhep.o'} diff --git a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h index 0dd5f20f71..cd4e6de668 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h @@ -8,7 +8,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc index d5eda63ee0..c06dcbb252 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc +++ b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h index 0c77cf58f0..a6eb185434 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h +++ b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index ad74707ae9..16374bd28e 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -14,7 +14,7 @@ Running MG5 in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect 2023-08-08 * +* VERSION 3.5.2_lo_vect 2023-11-08 * * * * WARNING: UNKNOWN DEVELOPMENT VERSION. * * WARNING: DO NOT USE FOR PRODUCTION * @@ -61,7 +61,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005573272705078125  +DEBUG: model prefixing takes 0.005791902542114258  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -170,7 +170,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.079 s +8 processes with 40 diagrams generated in 0.078 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Load PLUGIN.CUDACPP_OUTPUT @@ -206,12 +206,12 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. -Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.145 s +ALOHA: aloha creates 2 routines in 0.144 s FFV1 FFV1 FFV1 @@ -228,6 +228,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/ DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.658s -user 0m0.590s -sys 0m0.062s +real 0m3.656s +user 0m0.594s +sys 0m0.059s diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc index 037662f7db..4965f393c5 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.h b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.h index cdc2dc91ac..bf037c6c28 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.h +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gu_ttxu/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc index 12179b9801..5024e8e239 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.h b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.h index a90abc4ab4..0f49f5247b 100644 --- a/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.h +++ b/epochX/cudacpp/gq_ttq.sa/SubProcesses/P1_Sigma_sm_gux_ttxux/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h index 0dd5f20f71..cd4e6de668 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h @@ -8,7 +8,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc index d5eda63ee0..c06dcbb252 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc +++ b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.cc @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h index 0c77cf58f0..a6eb185434 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h +++ b/epochX/cudacpp/gq_ttq.sa/src/Parameters_sm.h @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt index 9d96566eb2..3b04fc3fb3 100644 --- a/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt +++ b/epochX/cudacpp/heft_gg_h.sa/CODEGEN_cudacpp_heft_gg_h_log.txt @@ -14,7 +14,7 @@ Running MG5 in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect 2023-08-08 * +* VERSION 3.5.2_lo_vect 2023-11-08 * * * * WARNING: UNKNOWN DEVELOPMENT VERSION. * * WARNING: DO NOT USE FOR PRODUCTION * @@ -166,6 +166,6 @@ INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg DEBUG: 'Entering PLUGIN_ProcessExporter.finalize', self.in_madevent_mode, type(self) =  Entering PLUGIN_ProcessExporter.finalize False [output.py at line 206]  quit -real 0m0.429s +real 0m3.422s user 0m0.371s -sys 0m0.051s +sys 0m0.048s diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc index 6cc0be1461..1d59f8e3cf 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.h b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.h index d0312182d5..dbc5aa0e4e 100644 --- a/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.h +++ b/epochX/cudacpp/heft_gg_h.sa/SubProcesses/P1_Sigma_heft_gg_h/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/heft_gg_h.sa/src/HelAmps_heft.h b/epochX/cudacpp/heft_gg_h.sa/src/HelAmps_heft.h index a2e9b6a70c..eae9ff5242 100644 --- a/epochX/cudacpp/heft_gg_h.sa/src/HelAmps_heft.h +++ b/epochX/cudacpp/heft_gg_h.sa/src/HelAmps_heft.h @@ -8,7 +8,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/heft_gg_h.sa/src/Parameters_heft.cc b/epochX/cudacpp/heft_gg_h.sa/src/Parameters_heft.cc index fde65d5571..e5442756b1 100644 --- a/epochX/cudacpp/heft_gg_h.sa/src/Parameters_heft.cc +++ b/epochX/cudacpp/heft_gg_h.sa/src/Parameters_heft.cc @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/heft_gg_h.sa/src/Parameters_heft.h b/epochX/cudacpp/heft_gg_h.sa/src/Parameters_heft.h index d1a451b2c3..790485fee0 100644 --- a/epochX/cudacpp/heft_gg_h.sa/src/Parameters_heft.h +++ b/epochX/cudacpp/heft_gg_h.sa/src/Parameters_heft.h @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index bb2844f553..8b6ca99446 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -14,7 +14,7 @@ Running MG5 in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect 2023-08-08 * +* VERSION 3.5.2_lo_vect 2023-11-08 * * * * WARNING: UNKNOWN DEVELOPMENT VERSION. * * WARNING: DO NOT USE FOR PRODUCTION * @@ -61,7 +61,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.00551915168762207  +DEBUG: model prefixing takes 0.00538325309753418  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -212,7 +212,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.137 s +13 processes with 76 diagrams generated in 0.134 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -378,7 +378,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.844 s +65 processes with 1119 diagrams generated in 1.811 s Total: 83 processes with 1202 diagrams output madevent ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=16384 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT @@ -497,7 +497,7 @@ INFO: Combined process d d~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED INFO: Combined process s s~ > t t~ WEIGHTED<=2 with process u u~ > t t~ WEIGHTED<=2 INFO: Creating files in directory P2_gg_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -506,15 +506,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg INFO: Creating files in directory P2_gg_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -523,15 +523,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  1 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux INFO: Creating files in directory P2_gu_ttxgu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -540,15 +540,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  2 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu INFO: Creating files in directory P2_gux_ttxgux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -557,15 +557,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  3 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux INFO: Creating files in directory P2_uux_ttxgg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -574,15 +574,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [export_cpp.py at line 711]  DEBUG: subproc_number =  4 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg INFO: Creating files in directory P1_gg_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -591,15 +591,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [export_cpp.py at line 711]  DEBUG: subproc_number =  5 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttxg INFO: Creating files in directory P2_uu_ttxuu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -608,15 +608,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  6 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu INFO: Creating files in directory P2_uux_ttxuux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -625,15 +625,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  7 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux INFO: Creating files in directory P2_uxux_ttxuxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -642,15 +642,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [export_cpp.py at line 711]  DEBUG: subproc_number =  8 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux INFO: Creating files in directory P2_uc_ttxuc DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -659,15 +659,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  9 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc INFO: Creating files in directory P2_uux_ttxccx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -676,15 +676,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  10 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx INFO: Creating files in directory P2_ucx_ttxucx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -693,15 +693,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  11 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx INFO: Creating files in directory P2_uxcx_ttxuxcx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -710,15 +710,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5, 6, 7] [export_cpp.py at line 711]  DEBUG: subproc_number =  12 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx INFO: Creating files in directory P1_gu_ttxu DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -727,15 +727,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  13 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gu_ttxu INFO: Creating files in directory P1_gux_ttxux DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -744,15 +744,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  14 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group gux_ttxux INFO: Creating files in directory P1_uux_ttxg DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -761,15 +761,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3, 4, 5] [export_cpp.py at line 711]  DEBUG: subproc_number =  15 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 INFO: Finding symmetric diagrams for subprocess group uux_ttxg INFO: Creating files in directory P0_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -778,15 +778,15 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  16 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group gg_ttx INFO: Creating files in directory P0_uux_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6226]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -795,21 +795,21 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1] [export_cpp.py at line 711]  DEBUG: subproc_number =  17 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1862]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  16384 True 16384 [export_v4.py at line 1872]  INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 INFO: Finding symmetric diagrams for subprocess group uux_ttx -Generated helas calls for 18 subprocesses (372 diagrams) in 1.298 s -Wrote files for 810 helas calls in 3.297 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.267 s +Wrote files for 810 helas calls in 3.215 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.339 s +ALOHA: aloha creates 5 routines in 0.333 s DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 197]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -817,7 +817,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.315 s +ALOHA: aloha creates 10 routines in 0.312 s VVV1 VVV1 FFV1 @@ -850,6 +850,7 @@ patching file Source/genps.inc patching file Source/makefile patching file SubProcesses/makefile patching file bin/internal/gen_ximprove.py +Hunk #1 succeeded at 391 (offset 6 lines). patching file bin/internal/madevent_interface.py DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1 patching file auto_dsig1.f @@ -1028,9 +1029,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m8.967s -user 0m8.408s -sys 0m0.506s +real 0m11.764s +user 0m8.242s +sys 0m0.480s ************************************************************ * * * W E L C O M E to * @@ -1043,7 +1044,7 @@ sys 0m0.506s * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect * +* VERSION 3.5.2_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * @@ -1076,7 +1077,7 @@ launch in debug mode * * * * * * * * * * * * -* VERSION 3.5.1_lo_vect * +* VERSION 3.5.2_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat index 944298ae75..c0b1a2fd98 100644 --- a/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat @@ -8,7 +8,7 @@ #* * * * #* * #* * -#* VERSION 3.5.1_lo_vect 2023-08-08 * +#* VERSION 3.5.2_lo_vect 2023-11-08 * #* * #* WARNING: UNKNOWN DEVELOPMENT VERSION. * #* WARNING: DO NOT USE FOR PRODUCTION * diff --git a/epochX/cudacpp/pp_tt012j.mad/MGMEVersion.txt b/epochX/cudacpp/pp_tt012j.mad/MGMEVersion.txt index 1c1a95761b..85c67c3554 100644 --- a/epochX/cudacpp/pp_tt012j.mad/MGMEVersion.txt +++ b/epochX/cudacpp/pp_tt012j.mad/MGMEVersion.txt @@ -1 +1 @@ -3.5.1_lo_vect \ No newline at end of file +3.5.2_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MGVersion.txt index 1c1a95761b..85c67c3554 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MGVersion.txt +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MGVersion.txt @@ -1 +1 @@ -3.5.1_lo_vect \ No newline at end of file +3.5.2_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc index 0317bbc95a..30815cd085 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.h index ecd2d1364e..448175be9d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f index dce732e252..963d8ec072 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f index a48f6997f3..d4e2956b18 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -216,7 +216,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f index d803e4f19f..5b3b723e59 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -301,7 +301,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc index 75110e8fec..fa46e42b8f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.h index 3d5ca9d556..e166fa1652 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f index 3d59efb411..2cc5a2026a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f index f9147f699e..2344ddbe81 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -234,7 +234,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f index 4c21758744..1dea73e826 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -304,7 +304,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc index f7f5899260..5e2bf0d19a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h index 9f559fe3ae..37d6ebe981 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f index d528b1d2f0..dd4cd3a0c2 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f index 110e204c24..e28575ead8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -216,7 +216,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f index bf665ff6e0..a885b7fde3 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -317,7 +317,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc index 90a457ac40..3b6b1a6c16 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h index cdc2dc91ac..bf037c6c28 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f index 249a3e4e3c..6c1667bc0f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f index ba39cab867..ee1484ab56 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -231,7 +231,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f index d61f0e1a21..b7d8649204 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -320,7 +320,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc index 9a73b3ed94..eb62f13990 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h index a90abc4ab4..0f49f5247b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f index f2eba72de7..c9b8759b60 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f index 5ec9701b78..62c235de64 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -231,7 +231,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f index b082becd2a..8a699645cd 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -320,7 +320,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc index dc1a3e9d26..c47ef64ec8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.h index 06af307caa..f8bdb38aee 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f index 408403e5d9..628e0d8092 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f index 842b1c72d4..b66a887225 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -234,7 +234,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f index 265f6006db..7bc63ee8a4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -320,7 +320,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc index cbc45ff652..0cbb15fba7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.h index a41aa7611a..9f43559181 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f index c23550e9b7..84ee7e5b85 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f index 4e2bfe85ab..aa73f64dba 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -216,7 +216,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f index c8fbb1cc8b..46e6ff0da7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -349,7 +349,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc index 5723ed5665..d9f2d09952 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.h index 95f4bf6912..f26b60c5bb 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f index d196e8ed65..abb75a925b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f index e5a0390c47..d6bf2155ff 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -228,7 +228,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f index 4f966fab6d..fabc6786d3 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -352,7 +352,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc index b8f74ecafe..0d1c319939 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.h index a54b0bb8fe..853175b477 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f index bc732da055..94fe1937c3 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f index 309be94a99..50c024adc3 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -231,7 +231,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f index c03cebacb0..210884dccf 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -352,7 +352,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc index 2495941a73..8e3985f427 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.h index d31dd972a9..e60cb5b6d7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f index 399b68be58..3e0e30af23 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f index 23d82657bf..e639ee4c34 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -231,7 +231,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f index 39422dc34c..a8c5f11ae3 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -352,7 +352,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc index 529477ff3e..22398e7ab4 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.h index 4f557f24ab..5329710b87 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f index da207359fc..94cfdd1487 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f index 4d12dfeade..37f4a35577 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -240,7 +240,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f index 9e27e48c99..66b1820c10 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -354,7 +354,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc index e54a24ea57..3955de70dd 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.h index 1818cf79ed..391789dc81 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f index cfd6a270b5..5ce83d5f12 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f index 5bac32b00a..ea0697602c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -266,7 +266,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f index 6bdc5db576..9403b67a1a 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -360,7 +360,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc index 8638bbefa2..bfc3d0809f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.h index 41e15f6ad0..2d95f4b170 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f index efdae70d19..44e8c9d920 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f index 50c16edaac..302d0eda9c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -234,7 +234,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f index 8b2cf62531..f51744ae5d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -352,7 +352,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc index c071cc6900..222800dcfd 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.h index b93bb3909d..14490d782f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f index 72e76f54e4..ab270fe554 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f index 577a8d9c54..e9b4ddc613 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -266,7 +266,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f index c5a7b6787c..f93b850d5f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -360,7 +360,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc index 2eb6b491fa..ef9407041b 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.h index 2f4866b6ca..1543c29649 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f index 4b08b69f90..f5ef1f7b43 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f index f4e431c5ce..83e40fb02c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -234,7 +234,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f index a843f4656a..9996fdea2d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -352,7 +352,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc index 8682128442..1aa88699db 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.h index dbd5b60487..58cece5c62 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f index 3e29e25982..867eb95566 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f index 123a3ae00e..ae43656176 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -234,7 +234,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f index 6d8f6b4ed8..205e3daf83 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -352,7 +352,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc index 7d3141cfc4..5f356a519e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.h index f92e527895..6bd3135c3c 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f index 44da6cd9ce..8ded31027d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f index a4cb748b19..7ce014f5f5 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -240,7 +240,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f index 53f591633e..dfbec413a8 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -354,7 +354,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc index 6ec302f68b..af04d58c3e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.h index 53c3b7149b..4e53fa1250 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.h +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f index 43ccdff1e1..2acdc960db 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f index 3a3ed05151..115e19c70e 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -234,7 +234,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f index dce10b9553..392b30a39f 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -352,7 +352,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/genps.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/genps.f index fe9c61504b..c00e33d954 100644 --- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/genps.f +++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/genps.f @@ -1877,12 +1877,12 @@ double precision function get_channel_cut(p, config) d1 = iforest(1, -i, config) d2 = iforest(2, -i, config) do j=0,3 - if (d1.gt.0.and.d1.le.2) then + if (d1.gt.0.and.d1.le.nincoming) then ptemp(j,-i) = ptemp(j,-i) - ptemp(j, d1) else ptemp(j,-i) = ptemp(j,-i)+ptemp(j, d1) endif - if (d2.gt.0.and.d2.le.2) then + if (d2.gt.0.and.d2.le.nincoming) then ptemp(j,-i) = ptemp(j,-i) - ptemp(j, d2) else ptemp(j,-i) = ptemp(j,-i)+ptemp(j, d2) diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/__init__.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/__init__.py index 16e60d8182..0d17042f0d 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/__init__.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/__init__.py @@ -26,6 +26,7 @@ class aMCatNLOError(MadGraph5Error): import os import logging import time +pjoin = os.path.join #Look for basic file position MG5DIR and MG4DIR MG5DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py index ef1bf58979..3995ce8109 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py @@ -2704,7 +2704,8 @@ def __new__(cls, finput=None, **opt): except Exception as error: import launch_plugin target_class = launch_plugin.RunCard - + elif issubclass(finput, RunCard): + target_class = finput else: return None @@ -2968,11 +2969,12 @@ def write(self, output_file, template=None, python_template=False, if python_template and not to_write: import string if self.blocks: - text = string.Template(text) mapping = {} for b in self.blocks: mapping[b.name] = b.get_template(self) - text = text.substitute(mapping) + if "$%s" % b.name not in text: + text += "\n$%s\n" % b.name + text = string.Template(text).substitute(mapping) if not self.list_parameter: text = text % self @@ -4875,6 +4877,9 @@ def create_default_for_process(self, proc_characteristic, history, proc_def): continue break + if proc_characteristic['ninitial'] == 1: + self['SDE_strategy'] =1 + if 'MLM' in proc_characteristic['limitations']: if self['dynamical_scale_choice'] == -1: self['dynamical_scale_choice'] = 3 @@ -5940,7 +5945,7 @@ def default_setup(self): self.add_param("CheckCycle", 3) self.add_param("MaxAttempts", 10) self.add_param("ZeroThres", 1e-9) - self.add_param("OSThres", 1.0e-13) + self.add_param("OSThres", 1.0e-8) self.add_param("DoubleCheckHelicityFilter", True) self.add_param("WriteOutFilters", True) self.add_param("UseLoopFilter", False) diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/common_run_interface.py index 14c7f310dc..87cb4b88df 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/common_run_interface.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/common_run_interface.py @@ -4906,6 +4906,7 @@ def __init__(self, question, cards=[], from_banner=None, banner=None, mode='auto self.load_default() self.define_paths(**opt) self.last_editline_pos = 0 + self.update_dependent_done = False if 'allow_arg' not in opt or not opt['allow_arg']: # add some mininal content for this: @@ -6585,7 +6586,9 @@ def postcmd(self, stop, line): self.check_card_consistency() if self.param_consistency: try: - self.do_update('dependent', timer=20) + if not self.update_dependent_done: + self.do_update('dependent', timer=20) + self.update_dependent_done = False except MadGraph5Error as error: if 'Missing block:' in str(error): self.fail_due_to_format +=1 @@ -6638,6 +6641,8 @@ def do_update(self, line, timer=0): self.update_dependent(self.mother_interface, self.me_dir, self.param_card, self.paths['param'], timer, run_card=self.run_card, lhapdfconfig=self.lhapdf) + self.update_dependent_done = True + elif args[0] == 'missing': self.update_missing() @@ -6717,12 +6722,13 @@ class TimeOutError(Exception): def handle_alarm(signum, frame): raise TimeOutError signal.signal(signal.SIGALRM, handle_alarm) + if timer: - signal.alarm(timer) log_level=30 else: log_level=20 + if run_card: as_for_pdf = {'cteq6_m': 0.118, 'cteq6_d': 0.118, @@ -6781,6 +6787,10 @@ def handle_alarm(signum, frame): logger.log(log_level, "update the strong coupling value (alpha_s) to the value from the pdf selected: %s", as_for_pdf[pdlabel]) modify = True + if timer: + signal.alarm(timer) + + # Try to load the model in the limited amount of time allowed try: model = mecmd.get_model() @@ -6909,7 +6919,8 @@ def check_block(self, blockname): def check_answer_consistency(self): """function called if the code reads a file""" self.check_card_consistency() - self.do_update('dependent', timer=20) + if not self.update_dependent_done: + self.do_update('dependent', timer=20) def help_set(self): '''help message for set''' diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/gen_ximprove.py index a88d60b282..5fd170d18d 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/gen_ximprove.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/gen_ximprove.py @@ -157,12 +157,16 @@ def get_helicity(self, to_submit=True, clean=True): (stdout, _) = p.communicate(''.encode()) stdout = stdout.decode('ascii',errors='ignore') - try: + if stdout: nb_channel = max([math.floor(float(d)) for d in stdout.split()]) - except Exception as error: - misc.sprint(stdout, 'no channel or error for %s' % Pdir) - continue - + else: + for matrix_file in misc.glob('matrix*orig.f', Pdir): + files.cp(matrix_file, matrix_file.replace('orig','optim')) + P_zero_result.append(Pdir) + if os.path.exists(pjoin(self.me_dir, 'error')): + os.remove(pjoin(self.me_dir, 'error')) + continue # bypass bad process + self.cmd.compile(['madevent_forhel'], cwd=Pdir) if not os.path.exists(pjoin(Pdir, 'madevent_forhel')): raise Exception('Error make madevent_forhel not successful') @@ -183,11 +187,13 @@ def get_helicity(self, to_submit=True, clean=True): #sym_input = "%(points)d %(iterations)d %(accuracy)f \n" % self.opts (stdout, _) = p.communicate(" ".encode()) stdout = stdout.decode('ascii',errors='ignore') - if os.path.exists(pjoin(self.me_dir,'error')): + if os.path.exists(pjoin(self.me_dir, 'error')): raise Exception(pjoin(self.me_dir,'error')) # note a continue is not enough here, we have in top to link # the matrixX_optim.f to matrixX_orig.f to let the code to work # after this error. + # for matrix_file in misc.glob('matrix*orig.f', Pdir): + # files.cp(matrix_file, matrix_file.replace('orig','optim')) if 'no events passed cuts' in stdout: raise Exception diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py index c9d1c7706a..0b849330ef 100644 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py @@ -57,7 +57,7 @@ def reset_simd(self, old_value, new_value, name): return Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source') subprocess.call(['make', 'cleanavx'], cwd=Sourcedir, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - + def plugin_input(self, finput): return @@ -79,7 +79,7 @@ def check_validity(self): self['sde_strategy'] = 1 if self['hel_recycling']: self['hel_recycling'] = False - + class GPURunCard(CPPRunCard): def default_setup(self): super(CPPRunCard, self).default_setup() diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/madevent_interface.py index d722702891..853aabc98a 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/madevent_interface.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/madevent_interface.py @@ -1,4 +1,4 @@ -################################################################################ +############################################################################### # # Copyright (c) 2011 The MadGraph5_aMC@NLO Development team and Contributors # @@ -3675,7 +3675,7 @@ def do_refine(self, line): devnull.close() ############################################################################ - def do_combine_iteration(self, line): + def do_comine_iteration(self, line): """Not in help: Combine a given iteration combine_iteration Pdir Gdir S|R step S is for survey R is for refine @@ -3703,7 +3703,7 @@ def do_combine_iteration(self, line): ############################################################################ def do_combine_events(self, line): """Advanced commands: Launch combine events""" - + start=time.time() args = self.split_arg(line) start = time.time() # Check argument's validity @@ -3798,9 +3798,7 @@ def do_combine_events(self, line): self.correct_bias() elif self.run_card['custom_fcts']: self.correct_bias() - - logger.info("combine events done in %s", time.time()-start) - + logger.info("combination of events done in %s s ", time.time()-start) self.to_store.append('event') @@ -7368,7 +7366,7 @@ def wait_monitoring(Idle, Running, Done): import optparse # Get the directory of the script real path (bin) # and add it to the current PYTHONPATH - root_path = os.path.dirname(os.path.dirname(os.path.realpath( __file__ ))) + #root_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath( __file__ )))) sys.path.insert(0, root_path) class MyOptParser(optparse.OptionParser): @@ -7411,7 +7409,13 @@ def error(self, msg=''): import logging.config # Set logging level according to the logging level given by options #logging.basicConfig(level=vars(logging)[options.logging]) + import internal import internal.coloring_logging + # internal.file = XXX/bin/internal/__init__.py + # => need three dirname to get XXX + # we use internal to have any issue with pythonpath finding the wrong file + me_dir = os.path.dirname(os.path.dirname(os.path.dirname(internal.__file__))) + print("me_dir is", me_dir) try: if __debug__ and options.logging == 'INFO': options.logging = 'DEBUG' @@ -7419,7 +7423,8 @@ def error(self, msg=''): level = int(options.logging) else: level = eval('logging.' + options.logging) - logging.config.fileConfig(os.path.join(root_path, 'internal', 'me5_logging.conf')) + log_path = os.path.join(me_dir, 'bin', 'internal', 'me5_logging.conf') + logging.config.fileConfig(log_path) logging.root.setLevel(level) logging.getLogger('madgraph').setLevel(level) except: @@ -7433,9 +7438,9 @@ def error(self, msg=''): if '--web' in args: i = args.index('--web') args.pop(i) - cmd_line = MadEventCmd(os.path.dirname(root_path),force_run=True) + cmd_line = MadEventCmd(me_dir, force_run=True) else: - cmd_line = MadEventCmdShell(os.path.dirname(root_path),force_run=True) + cmd_line = MadEventCmdShell(me_dir, force_run=True) if not hasattr(cmd_line, 'do_%s' % args[0]): if parser_error: print(parser_error) diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/misc.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/misc.py index d3fed3baa2..91cd3e5c22 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/misc.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/misc.py @@ -347,7 +347,7 @@ def tell(msg): if dependency=='ninja': if cmd.options['ninja'] in ['None',None,''] or\ (cmd.options['ninja'] == './HEPTools/lib' and not MG5dir is None and\ - which_lib(pjoin(MG5dir,cmd.options['ninja'],'libninja.a')) is None): + which_lib(pjoin(MG5dir,cmd.options['ninja'],'lib','libninja.a')) is None): tell("Installing ninja...") cmd.do_install('ninja') diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/shower_card.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/shower_card.py index c6d3948cc4..c344ea1b15 100755 --- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/shower_card.py +++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/shower_card.py @@ -45,7 +45,9 @@ class ShowerCard(dict): false = ['.false.', 'f', 'false', '0'] logical_vars = ['ue_enabled', 'hadronize', 'b_stable', 'pi_stable', 'wp_stable', 'wm_stable', 'z_stable', 'h_stable', 'tap_stable', 'tam_stable', - 'mup_stable', 'mum_stable', 'is_4lep', 'is_bbar', 'combine_td'] + 'mup_stable', 'mum_stable', 'is_4lep', 'is_bbar', 'combine_td', + 'space_shower_me_corrections', 'time_shower_me_corrections', + 'time_shower_me_extended', 'time_shower_me_after_first'] string_vars = ['extralibs', 'extrapaths', 'includepaths', 'analyse'] for i in range(1,100): string_vars.append('dm_'+str(i)) @@ -82,7 +84,11 @@ class ShowerCard(dict): 'b_mass' : {'HERWIG6':'b_mass', 'PYTHIA6': 'b_mass', 'HERWIGPP': 'b_mass', 'PYTHIA8': 'b_mass'}, 'analyse' : {'HERWIG6':'hwuti', 'PYTHIA6':'pyuti', 'HERWIGPP':'hwpputi', 'PYTHIA8':'py8uti'}, 'qcut' : {'PYTHIA8':'qcut'}, - 'njmax' : {'PYTHIA8':'njmax'}} + 'njmax' : {'PYTHIA8':'njmax'}, + 'space_shower_me_corrections' : {'PYTHIA8':'space_shower_me_corrections'}, + 'time_shower_me_corrections' : {'PYTHIA8':'time_shower_me_corrections'}, + 'time_shower_me_extended' : {'PYTHIA8':'time_shower_me_extended'}, + 'time_shower_me_after_first' : {'PYTHIA8':'time_shower_me_after_first'}} stdhep_dict = {'HERWIG6':'mcatnlo_hwan_stdhep.o', 'PYTHIA6':'mcatnlo_pyan_stdhep.o'} diff --git a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h index 9b946c21e1..8df465ad6d 100644 --- a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h @@ -8,7 +8,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/src/Parameters_sm.cc b/epochX/cudacpp/pp_tt012j.mad/src/Parameters_sm.cc index 9d09eb6b62..64fc3fea62 100644 --- a/epochX/cudacpp/pp_tt012j.mad/src/Parameters_sm.cc +++ b/epochX/cudacpp/pp_tt012j.mad/src/Parameters_sm.cc @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/pp_tt012j.mad/src/Parameters_sm.h b/epochX/cudacpp/pp_tt012j.mad/src/Parameters_sm.h index 6b32c66b9b..b6568d3761 100644 --- a/epochX/cudacpp/pp_tt012j.mad/src/Parameters_sm.h +++ b/epochX/cudacpp/pp_tt012j.mad/src/Parameters_sm.h @@ -7,7 +7,7 @@ // Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.1_lo_vect, 2023-08-08 +// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index bcf56600ba..16028d3846 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -2,9 +2,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/e CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,25 +15,25 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2023-11-03_19:52:13 +DATE: 2023-11-09_18:26:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6373s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6287s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6383s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6302s + [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1807s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1728s - [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1882s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1797s + [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.66E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4217s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3352s - [COUNTERS] Fortran MEs ( 1 ) : 0.0865s for 90112 events => throughput is 1.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4280s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3388s + [COUNTERS] Fortran MEs ( 1 ) : 0.0892s for 90112 events => throughput is 1.01E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1919s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1852s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 8192 events => throughput is 1.23E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1893s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1830s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4231s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3509s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0722s for 90112 events => throughput is 1.25E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4144s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3434s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0710s for 90112 events => throughput is 1.27E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.217666e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.246747e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.241611e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.254814e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1865s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1826s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1854s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1814s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0040s for 8192 events => throughput is 2.06E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3926s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3476s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0451s for 90112 events => throughput is 2.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3872s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3433s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0440s for 90112 events => throughput is 2.05E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.991197e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.008841e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.990100e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.041604e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1864s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1832s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.59E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1821s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1790s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.66E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3800s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3465s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0335s for 90112 events => throughput is 2.69E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3764s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3431s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0333s for 90112 events => throughput is 2.71E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.603611e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.648221e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.718712e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.737599e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1833s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1805s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.93E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1828s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1800s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.94E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3774s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3449s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0324s for 90112 events => throughput is 2.78E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3726s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3409s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0317s for 90112 events => throughput is 2.84E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.713996e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.822405e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.775269e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.840653e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1890s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1855s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1827s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1792s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.34E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3894s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3496s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0398s for 90112 events => throughput is 2.26E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3845s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3458s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0387s for 90112 events => throughput is 2.33E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.190424e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.213684e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.183626e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.288308e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5997s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5992s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.68E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5941s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5936s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.56E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,8 +547,8 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813628E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7696s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7647s + [COUNTERS] PROGRAM TOTAL : 0.7643s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7594s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.85E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.173877e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.122558e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.893710e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.902108e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.716630e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.029032e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.387595e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.427964e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.739579e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.990174e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.929113e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.966232e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.693635e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.011562e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.118370e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.099952e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index ff3c2ae8d4..bed8731e5c 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,10 +15,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -27,13 +28,12 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2023-11-03_19:52:30 +DATE: 2023-11-09_18:26:24 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6418s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6338s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6375s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6295s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1827s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1742s - [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1779s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1700s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4264s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3383s - [COUNTERS] Fortran MEs ( 1 ) : 0.0882s for 90112 events => throughput is 1.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4168s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3310s + [COUNTERS] Fortran MEs ( 1 ) : 0.0858s for 90112 events => throughput is 1.05E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166087172673] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1909s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1845s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.27E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1876s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1813s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 8192 events => throughput is 1.31E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501907796603360E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4197s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3492s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0705s for 90112 events => throughput is 1.28E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4132s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3439s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0693s for 90112 events => throughput is 1.30E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.260485e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.290954e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.240620e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.269110e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165570339780] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1824s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1798s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.18E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1799s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1773s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.11E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905322826635E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3742s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3464s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0278s for 90112 events => throughput is 3.24E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3696s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3425s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 90112 events => throughput is 3.33E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.182676e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.211958e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.343050e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.331194e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165593922979] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1914s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1892s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.72E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1846s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1823s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.66E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905316084181E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3767s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3513s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0254s for 90112 events => throughput is 3.55E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3689s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3438s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0251s for 90112 events => throughput is 3.59E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.496883e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.583243e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.660390e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.664821e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747165593922979] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1867s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1844s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.57E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1881s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1858s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.64E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501905316084181E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3763s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3515s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0248s for 90112 events => throughput is 3.63E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4013s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3744s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0269s for 90112 events => throughput is 3.35E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.562187e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.708142e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.601892e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.716354e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166440400542] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1875s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1852s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.57E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1876s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1854s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.69E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501908978565555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3774s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3519s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0255s for 90112 events => throughput is 3.53E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3791s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3532s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0259s for 90112 events => throughput is 3.47E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.223682e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.388042e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.583359e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.799218e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747166823487174] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5998s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5993s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.73E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5957s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5952s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.72E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501910542849674E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7713s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7665s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.87E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7596s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7551s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 90112 events => throughput is 1.97E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.583398e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.613080e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.881767e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.898284e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.997979e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.543811e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.043514e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.026187e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.954785e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.468953e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.219791e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.241582e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.299152e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.812787e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.462264e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.411277e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 7741c53b46..8b8c11aaf5 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 - make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_19:52:47 +DATE: 2023-11-09_18:26:42 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 3893 events (found 7395 events) - [COUNTERS] PROGRAM TOTAL : 0.6387s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6300s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6293s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6211s + [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169064681776] fbridge_mode=0 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1817s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1737s - [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1828s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1742s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.52E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,8 +109,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919904813656E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4238s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3373s + [COUNTERS] PROGRAM TOTAL : 0.4185s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3320s [COUNTERS] Fortran MEs ( 1 ) : 0.0865s for 90112 events => throughput is 1.04E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211734] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1953s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1883s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.17E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1883s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1817s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4433s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3677s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0757s for 90112 events => throughput is 1.19E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4177s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3452s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0725s for 90112 events => throughput is 1.24E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.177056e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.192297e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.187091e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.206668e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169074211728] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1964s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1922s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.95E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1831s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1792s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.12E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919915927155E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.4148s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3686s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0462s for 90112 events => throughput is 1.95E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3847s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3426s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0421s for 90112 events => throughput is 2.14E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.000126e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.077610e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.127276e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.127798e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1855s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1824s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.62E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1845s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1815s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.72E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3807s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3465s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0342s for 90112 events => throughput is 2.63E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3749s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3413s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0336s for 90112 events => throughput is 2.68E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.610232e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.567900e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.645393e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.786544e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1900s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1870s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.68E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1822s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1793s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.82E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3808s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3484s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0325s for 90112 events => throughput is 2.78E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3728s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3415s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0313s for 90112 events => throughput is 2.88E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.740865e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.787216e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.846547e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.802177e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169063975949] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.1860s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1826s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.46E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1827s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1792s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.33E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919908700741E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.3896s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3515s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0381s for 90112 events => throughput is 2.36E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3808s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3437s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0371s for 90112 events => throughput is 2.43E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.142678e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.306669e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.406082e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.302969e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2175 [0.21747169066587257] fbridge_mode=1 [UNWEIGHT] Wrote 1611 events (found 1616 events) - [COUNTERS] PROGRAM TOTAL : 0.5998s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5993s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.60E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5952s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5947s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.67E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0915 [9.1501919911173610E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1803 events (found 1808 events) - [COUNTERS] PROGRAM TOTAL : 0.7721s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7672s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.85E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7615s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7567s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.88E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.181977e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.094813e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.926668e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.912678e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.726329e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.000800e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.399920e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.334730e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.694690e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.018486e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.877527e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.914438e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.708142e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.024074e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.118945e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.129214e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 1c30dae812..824a8e25d5 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -2,8 +2,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_19:53:04 +DATE: 2023-11-09_18:26:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3686s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3264s - [COUNTERS] Fortran MEs ( 1 ) : 0.0422s for 8192 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3548s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3140s + [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3158s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2736s - [COUNTERS] Fortran MEs ( 1 ) : 0.0422s for 8192 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3094s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2683s + [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6988s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2402s - [COUNTERS] Fortran MEs ( 1 ) : 0.4586s for 90112 events => throughput is 1.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6956s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2429s + [COUNTERS] Fortran MEs ( 1 ) : 0.4528s for 90112 events => throughput is 1.99E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3516s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3139s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0377s for 8192 events => throughput is 2.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3445s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3078s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0367s for 8192 events => throughput is 2.23E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775372] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7148s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2977s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4171s for 90112 events => throughput is 2.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6787s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2659s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4128s for 90112 events => throughput is 2.18E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.143201e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.206364e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.178576e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.211188e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600102] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3233s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3012s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3133s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2921s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.88E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775379] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.5228s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2789s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2439s for 90112 events => throughput is 3.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4919s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2565s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2354s for 90112 events => throughput is 3.83E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.615837e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.806213e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.718240e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.795645e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3023s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2891s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 8192 events => throughput is 6.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2981s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2850s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.23E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4266s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2769s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1496s for 90112 events => throughput is 6.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3832s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2385s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1448s for 90112 events => throughput is 6.22E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.870487e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.053490e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.072305e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.106690e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3006s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2887s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0119s for 8192 events => throughput is 6.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2943s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2825s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0117s for 8192 events => throughput is 6.97E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4026s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2691s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1335s for 90112 events => throughput is 6.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3653s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2365s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1287s for 90112 events => throughput is 7.00E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.610205e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.704382e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.622254e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.799597e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3191s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2997s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0194s for 8192 events => throughput is 4.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3082s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2885s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.15E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.5054s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2826s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2228s for 90112 events => throughput is 4.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6624s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4291s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2333s for 90112 events => throughput is 3.86E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.911690e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.938387e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.045481e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.929754e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600109] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.7037s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7032s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.43E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6969s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6963s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.46E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775386] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6871s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6805s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.37E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6570s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6507s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.43E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.043596e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.071187e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.671088e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.692368e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.005777e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.183000e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.074802e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.074203e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.019573e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.195387e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.147636e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.150737e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.014036e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.203236e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.011683e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.040065e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 7edcebceb9..6ff403b879 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,12 +1,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 +make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z @@ -17,13 +17,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_19:53:30 +DATE: 2023-11-09_18:27:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3667s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3245s - [COUNTERS] Fortran MEs ( 1 ) : 0.0422s for 8192 events => throughput is 1.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3494s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3093s + [COUNTERS] Fortran MEs ( 1 ) : 0.0401s for 8192 events => throughput is 2.04E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3249s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2811s - [COUNTERS] Fortran MEs ( 1 ) : 0.0438s for 8192 events => throughput is 1.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3068s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2663s + [COUNTERS] Fortran MEs ( 1 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7464s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2748s - [COUNTERS] Fortran MEs ( 1 ) : 0.4716s for 90112 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6536s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2070s + [COUNTERS] Fortran MEs ( 1 ) : 0.4466s for 90112 events => throughput is 2.02E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690706767555099] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3467s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3115s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0352s for 8192 events => throughput is 2.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3397s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3049s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0348s for 8192 events => throughput is 2.35E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782605295497] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6806s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2908s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3898s for 90112 events => throughput is 2.31E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6398s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2589s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3809s for 90112 events => throughput is 2.37E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.279168e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.342865e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.299428e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.331036e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690702885183541] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3091s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2943s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0148s for 8192 events => throughput is 5.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2992s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2845s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.59E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223778858016772] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4417s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2764s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1652s for 90112 events => throughput is 5.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4772s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3090s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1682s for 90112 events => throughput is 5.36E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.234141e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.225442e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.323283e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.299428e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694374060818] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2903s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2825s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3093s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3001s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0092s for 8192 events => throughput is 8.88E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775951815753] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3699s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2805s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0894s for 90112 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3166s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2317s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0848s for 90112 events => throughput is 1.06E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.010480e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.025673e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.003913e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.017812e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690694374060818] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2913s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2842s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0071s for 8192 events => throughput is 1.15E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2858s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2785s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.12E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223775951815753] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3439s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2627s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0812s for 90112 events => throughput is 1.11E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3072s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2282s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0790s for 90112 events => throughput is 1.14E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.090791e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.097760e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.092579e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.119253e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690698914467276] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2963s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2859s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0104s for 8192 events => throughput is 7.89E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2907s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2807s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0099s for 8192 events => throughput is 8.25E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223780273983500] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3867s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2714s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1153s for 90112 events => throughput is 7.81E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3509s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2397s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1112s for 90112 events => throughput is 8.10E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.366599e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.884299e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.487198e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.701504e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690703397697980] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.7024s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7018s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.52E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6960s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6955s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.51E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223786763175951] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6918s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6861s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0056s for 90112 events => throughput is 1.60E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6624s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6571s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 90112 events => throughput is 1.68E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.243778e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.111635e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.844714e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.880409e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.837802e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.143607e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.769339e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.762374e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.775138e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.140173e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.863954e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.866583e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.397746e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.685718e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.449606e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.400545e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 30dac17633..9b02995ca5 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -3,9 +3,9 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_19:53:55 +DATE: 2023-11-09_18:27:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 420 events (found 1577 events) - [COUNTERS] PROGRAM TOTAL : 0.3561s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3153s - [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3509s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3105s + [COUNTERS] Fortran MEs ( 1 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708277600116] fbridge_mode=0 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3103s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2697s - [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3067s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2662s + [COUNTERS] Fortran MEs ( 1 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782291775365] fbridge_mode=0 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6795s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2275s - [COUNTERS] Fortran MEs ( 1 ) : 0.4521s for 90112 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6580s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2117s + [COUNTERS] Fortran MEs ( 1 ) : 0.4462s for 90112 events => throughput is 2.02E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3522s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3138s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0384s for 8192 events => throughput is 2.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3460s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3081s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0379s for 8192 events => throughput is 2.16E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.7156s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2968s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4188s for 90112 events => throughput is 2.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6700s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2581s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4119s for 90112 events => throughput is 2.19E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.113023e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.182152e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.146418e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.183502e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709601032026] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3199s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2989s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 8192 events => throughput is 3.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3147s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2941s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0207s for 8192 events => throughput is 3.96E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783635280988] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.5211s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2871s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2339s for 90112 events => throughput is 3.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4759s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2478s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2281s for 90112 events => throughput is 3.95E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.687467e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.820026e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.724259e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.775419e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3043s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2913s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0129s for 8192 events => throughput is 6.33E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2965s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2837s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0128s for 8192 events => throughput is 6.38E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.4192s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2746s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1446s for 90112 events => throughput is 6.23E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3920s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2488s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1432s for 90112 events => throughput is 6.29E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.051901e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.159361e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.195854e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.220899e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.2995s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2875s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0120s for 8192 events => throughput is 6.80E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3061s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2938s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0123s for 8192 events => throughput is 6.65E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.3959s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2664s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1296s for 90112 events => throughput is 6.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3693s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2423s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1270s for 90112 events => throughput is 7.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.842430e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.912537e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.007264e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.069074e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690709681138244] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.3146s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2960s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0186s for 8192 events => throughput is 4.40E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3327s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3109s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 8192 events => throughput is 3.75E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223783652032040] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.5378s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3142s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2236s for 90112 events => throughput is 4.03E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4629s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2551s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2078s for 90112 events => throughput is 4.34E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.894022e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.077933e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.946552e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.997576e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.69 [47.690708266690699] fbridge_mode=1 [UNWEIGHT] Wrote 434 events (found 1125 events) - [COUNTERS] PROGRAM TOTAL : 0.7067s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7061s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.46E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6985s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6979s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.40E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 46.22 [46.223782303744791] fbridge_mode=1 [UNWEIGHT] Wrote 1727 events (found 1732 events) - [COUNTERS] PROGRAM TOTAL : 1.6929s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6862s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 90112 events => throughput is 1.34E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6617s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6553s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 90112 events => throughput is 1.42E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.049753e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.060435e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.613651e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.608769e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.019403e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.186491e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.060699e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.059369e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.995962e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.182441e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.142982e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.136921e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.026315e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.174632e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.022885e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.949461e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index d992721ecf..241597d591 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -6,8 +6,8 @@ make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=avx2 +make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' @@ -16,7 +16,6 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' @@ -24,6 +23,7 @@ make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_19:54:21 +DATE: 2023-11-09_18:28:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5463s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2264s - [COUNTERS] Fortran MEs ( 1 ) : 0.3199s for 8192 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5556s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2379s + [COUNTERS] Fortran MEs ( 1 ) : 0.3178s for 8192 events => throughput is 2.58E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5423s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2222s - [COUNTERS] Fortran MEs ( 1 ) : 0.3201s for 8192 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5351s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2203s + [COUNTERS] Fortran MEs ( 1 ) : 0.3148s for 8192 events => throughput is 2.60E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.9241s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4090s - [COUNTERS] Fortran MEs ( 1 ) : 3.5151s for 90112 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.8579s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3886s + [COUNTERS] Fortran MEs ( 1 ) : 3.4692s for 90112 events => throughput is 2.60E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470791E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8783s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5509s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3274s for 8192 events => throughput is 2.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8596s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5355s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3241s for 8192 events => throughput is 2.53E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.3304s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7125s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6180s for 90112 events => throughput is 2.49E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.2563s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6842s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5721s for 90112 events => throughput is 2.52E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.563855e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.570949e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.539633e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.596498e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470777E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5609s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3903s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1705s for 8192 events => throughput is 4.80E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5542s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3858s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1684s for 8192 events => throughput is 4.87E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.4794s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5811s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8984s for 90112 events => throughput is 4.75E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.5019s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5803s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9216s for 90112 events => throughput is 4.69E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.820475e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.985717e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.874297e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.959096e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3928s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3073s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0854s for 8192 events => throughput is 9.59E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3840s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3011s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0829s for 8192 events => throughput is 9.88E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.4294s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4857s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9437s for 90112 events => throughput is 9.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3753s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4512s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9241s for 90112 events => throughput is 9.75E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.717012e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005162e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.756457e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.000723e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3746s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2982s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0765s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3672s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2918s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0754s for 8192 events => throughput is 1.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.3655s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5058s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8597s for 90112 events => throughput is 1.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2690s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4430s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8260s for 90112 events => throughput is 1.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.094100e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.111268e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.081248e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.117996e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470750E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4370s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3297s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1073s for 8192 events => throughput is 7.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4279s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3235s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1044s for 8192 events => throughput is 7.85E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655541E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.6869s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5079s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1790s for 90112 events => throughput is 7.64E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.6406s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4855s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1551s for 90112 events => throughput is 7.80E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.730653e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.832306e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.578143e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.896180e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6799s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6745s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0053s for 8192 events => throughput is 1.53E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6558s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6503s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.50E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655597E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8667s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8438s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0229s for 90112 events => throughput is 3.94E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8300s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8072s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.95E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.611230e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.613028e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.333105e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.229609e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.644038e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.871226e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.240451e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.236452e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.653799e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.869896e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.251657e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.247810e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.651458e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.851703e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.754830e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.745705e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index a339973536..9b1af7b411 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 + +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,11 +15,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_19:55:03 +DATE: 2023-11-09_18:28:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5498s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2259s - [COUNTERS] Fortran MEs ( 1 ) : 0.3239s for 8192 events => throughput is 2.53E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5377s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2218s + [COUNTERS] Fortran MEs ( 1 ) : 0.3159s for 8192 events => throughput is 2.59E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5475s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2238s - [COUNTERS] Fortran MEs ( 1 ) : 0.3236s for 8192 events => throughput is 2.53E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5364s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2203s + [COUNTERS] Fortran MEs ( 1 ) : 0.3161s for 8192 events => throughput is 2.59E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.9843s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4284s - [COUNTERS] Fortran MEs ( 1 ) : 3.5559s for 90112 events => throughput is 2.53E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.9162s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3985s + [COUNTERS] Fortran MEs ( 1 ) : 3.5176s for 90112 events => throughput is 2.56E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196349765248158E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8606s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5403s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3204s for 8192 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8412s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5250s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3162s for 8192 events => throughput is 2.59E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310860767768514E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.2449s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7120s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5329s for 90112 events => throughput is 2.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.1769s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6882s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4887s for 90112 events => throughput is 2.58E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.612374e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.661457e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.564881e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.666467e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196334183509370E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4339s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3327s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1012s for 8192 events => throughput is 8.10E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4080s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3132s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0948s for 8192 events => throughput is 8.64E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847547651041E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.5445s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4937s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0508s for 90112 events => throughput is 8.58E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.5043s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4696s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0347s for 90112 events => throughput is 8.71E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.676181e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.800531e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.776153e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.815957e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196330801117323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3149s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2698s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0450s for 8192 events => throughput is 1.82E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3077s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2643s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847326088065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.9260s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4419s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4841s for 90112 events => throughput is 1.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8998s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4206s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4793s for 90112 events => throughput is 1.88E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.865505e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.823286e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.837629e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.826868e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196330801117323E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3024s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2625s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0399s for 8192 events => throughput is 2.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2983s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2592s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0391s for 8192 events => throughput is 2.10E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310847326088065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8768s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4395s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4373s for 90112 events => throughput is 2.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8383s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4080s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4303s for 90112 events => throughput is 2.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.065719e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.101947e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.103855e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.126133e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196344079460428E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3291s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2768s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0523s for 8192 events => throughput is 1.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3220s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2717s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0503s for 8192 events => throughput is 1.63E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310857804286998E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.0319s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4573s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5745s for 90112 events => throughput is 1.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9888s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4251s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5637s for 90112 events => throughput is 1.60E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.561181e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.589248e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.560141e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.587181e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196349366365994E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6502s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6494s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0009s for 8192 events => throughput is 9.50E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6498s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6490s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.66E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310864949473968E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8485s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8390s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0096s for 90112 events => throughput is 9.41E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8143s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8048s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 90112 events => throughput is 9.51E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.292780e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.303788e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.862148e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.857184e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.637111e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.727610e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.443658e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.358085e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.653596e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.712514e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.515346e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.447022e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.504423e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.573590e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.620516e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.621450e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 0d971ecde6..e102a98f20 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_19:55:40 +DATE: 2023-11-09_18:29:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 42 events (found 469 events) - [COUNTERS] PROGRAM TOTAL : 0.5559s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2317s - [COUNTERS] Fortran MEs ( 1 ) : 0.3242s for 8192 events => throughput is 2.53E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5406s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2214s + [COUNTERS] Fortran MEs ( 1 ) : 0.3192s for 8192 events => throughput is 2.57E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196357922470764E-002] fbridge_mode=0 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5470s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2235s - [COUNTERS] Fortran MEs ( 1 ) : 0.3235s for 8192 events => throughput is 2.53E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5369s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2199s + [COUNTERS] Fortran MEs ( 1 ) : 0.3170s for 8192 events => throughput is 2.58E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872077655555E-002] fbridge_mode=0 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 4.9714s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4219s - [COUNTERS] Fortran MEs ( 1 ) : 3.5496s for 90112 events => throughput is 2.54E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.8531s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3845s + [COUNTERS] Fortran MEs ( 1 ) : 3.4687s for 90112 events => throughput is 2.60E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358763382007E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.8877s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5532s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3345s for 8192 events => throughput is 2.45E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.8764s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5433s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3331s for 8192 events => throughput is 2.46E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872835011053E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 5.5218s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7614s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.7604s for 90112 events => throughput is 2.40E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3597s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7144s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6453s for 90112 events => throughput is 2.47E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.427313e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.553245e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.496439e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.536593e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358804670396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.5567s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3892s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1675s for 8192 events => throughput is 4.89E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5484s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3827s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1657s for 8192 events => throughput is 4.94E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872836789727E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 3.4587s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5767s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8820s for 90112 events => throughput is 4.79E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.3712s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5426s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8286s for 90112 events => throughput is 4.93E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.968795e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.047917e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.959892e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.047714e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3947s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3085s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0862s for 8192 events => throughput is 9.51E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.3884s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3047s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0838s for 8192 events => throughput is 9.78E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.4507s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4993s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9513s for 90112 events => throughput is 9.47E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.3827s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4554s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9273s for 90112 events => throughput is 9.72E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.685236e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.985245e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.962312e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.974556e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358586501358E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.3728s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2978s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0750s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3689s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2951s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0738s for 8192 events => throughput is 1.11E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872708918333E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.2949s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4713s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8237s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2643s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4493s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8151s for 90112 events => throughput is 1.11E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.124486e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.067840e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.126890e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.069793e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358757578441E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.4452s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3335s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1117s for 8192 events => throughput is 7.34E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4597s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3406s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1191s for 8192 events => throughput is 6.88E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872803699391E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 2.7230s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5103s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2127s for 90112 events => throughput is 7.43E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7582s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5206s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2377s for 90112 events => throughput is 7.28E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.441126e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.675272e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.419166e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.626790e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.0972 [9.7196358102981245E-002] fbridge_mode=1 [UNWEIGHT] Wrote 41 events (found 467 events) - [COUNTERS] PROGRAM TOTAL : 0.6594s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6540s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6588s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6533s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.50E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.08131 [8.1310872068634174E-002] fbridge_mode=1 [UNWEIGHT] Wrote 679 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 1.8526s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8298s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.94E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.8293s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8065s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.95E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.626262e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.635720e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.888012e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.120274e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.627419e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.835173e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.234131e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.231986e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.606969e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.818919e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.246896e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.242590e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.626608e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.805414e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.728520e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.724480e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index ba8c60f62e..408d8d380a 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_19:56:23 +DATE: 2023-11-09_18:30:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.4568s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2815s - [COUNTERS] Fortran MEs ( 1 ) : 4.1753s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3928s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2780s + [COUNTERS] Fortran MEs ( 1 ) : 4.1147s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.5175s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2787s - [COUNTERS] Fortran MEs ( 1 ) : 4.2387s for 8192 events => throughput is 1.93E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3846s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2715s + [COUNTERS] Fortran MEs ( 1 ) : 4.1131s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 48.0120s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9235s - [COUNTERS] Fortran MEs ( 1 ) : 46.0885s for 90112 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.4210s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8888s + [COUNTERS] Fortran MEs ( 1 ) : 45.5321s for 90112 events => throughput is 1.98E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.7799s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4663s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3136s for 8192 events => throughput is 1.90E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.6565s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4044s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2521s for 8192 events => throughput is 1.93E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 53.8857s - [COUNTERS] Fortran Overhead ( 0 ) : 6.1301s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.7557s for 90112 events => throughput is 1.89E+03 events/s + [COUNTERS] PROGRAM TOTAL : 52.9600s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0482s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.9118s for 90112 events => throughput is 1.92E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.953970e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.992604e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.950653e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.989276e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352993E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.8228s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5191s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3037s for 8192 events => throughput is 3.56E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7458s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4719s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2739s for 8192 events => throughput is 3.60E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 29.7001s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1956s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.5045s for 90112 events => throughput is 3.53E+03 events/s + [COUNTERS] PROGRAM TOTAL : 29.7086s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1354s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.5732s for 90112 events => throughput is 3.52E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.686347e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.697279e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.681541e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.704506e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.2608s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2531s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0077s for 8192 events => throughput is 8.13E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.2161s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2329s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9832s for 8192 events => throughput is 8.33E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.8799s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8850s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.9950s for 90112 events => throughput is 8.20E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.6675s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8433s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.8242s for 90112 events => throughput is 8.33E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.425637e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.632389e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.448586e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.597678e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.0082s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1311s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8771s for 8192 events => throughput is 9.34E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9728s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1137s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8591s for 8192 events => throughput is 9.54E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.4208s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7744s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.6464s for 90112 events => throughput is 9.34E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.1508s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7103s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4405s for 90112 events => throughput is 9.55E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.625406e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.863291e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.599473e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.840135e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311353009E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.4768s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3764s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1005s for 8192 events => throughput is 7.44E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5697s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4500s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1197s for 8192 events => throughput is 7.32E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421158E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 15.3944s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0207s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.3737s for 90112 events => throughput is 7.28E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.0835s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0464s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.0370s for 90112 events => throughput is 7.49E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.487218e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.677485e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.501573e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.683279e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352998E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.8150s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7821s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0330s for 8192 events => throughput is 2.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8101s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7773s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0327s for 8192 events => throughput is 2.50E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421161E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.7813s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4228s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3586s for 90112 events => throughput is 2.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7514s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3963s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3551s for 90112 events => throughput is 2.54E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.281506e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.285714e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.519229e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.505353e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.106281e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.109677e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.149081e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.147684e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.098811e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.113597e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.169654e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.164951e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.104970e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.106343e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.438070e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.432331e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 2c58d8399d..f4a809f68b 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' + make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 - make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,11 +15,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_20:00:40 +DATE: 2023-11-09_18:34:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.4730s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2806s - [COUNTERS] Fortran MEs ( 1 ) : 4.1924s for 8192 events => throughput is 1.95E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3944s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2768s + [COUNTERS] Fortran MEs ( 1 ) : 4.1176s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.4924s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2814s - [COUNTERS] Fortran MEs ( 1 ) : 4.2110s for 8192 events => throughput is 1.95E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5146s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2747s + [COUNTERS] Fortran MEs ( 1 ) : 4.2399s for 8192 events => throughput is 1.93E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 48.0870s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9193s - [COUNTERS] Fortran MEs ( 1 ) : 46.1676s for 90112 events => throughput is 1.95E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.3456s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8962s + [COUNTERS] Fortran MEs ( 1 ) : 45.4494s for 90112 events => throughput is 1.98E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396490802749E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 8.5167s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3246s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1920s for 8192 events => throughput is 1.95E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.3558s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2546s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1013s for 8192 events => throughput is 2.00E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774602344628E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 52.0969s - [COUNTERS] Fortran Overhead ( 0 ) : 5.9741s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.1228s for 90112 events => throughput is 1.95E+03 events/s + [COUNTERS] PROGRAM TOTAL : 51.2827s + [COUNTERS] Fortran Overhead ( 0 ) : 5.9515s + [COUNTERS] CudaCpp MEs ( 2 ) : 45.3313s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.036738e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.068073e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.035901e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.068719e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277389126121586E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.5366s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3964s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1402s for 8192 events => throughput is 7.18E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4998s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3795s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1203s for 8192 events => throughput is 7.31E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803771887543366E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 15.6834s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0490s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.6344s for 90112 events => throughput is 7.13E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.4928s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0115s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.4813s for 90112 events => throughput is 7.22E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.385848e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.470531e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.336063e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.461238e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390198115864E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.2706s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7693s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5013s for 8192 events => throughput is 1.63E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.2540s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7572s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4968s for 8192 events => throughput is 1.65E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774416711566E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.9611s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4052s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.5558s for 90112 events => throughput is 1.62E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.8987s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3780s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.5207s for 90112 events => throughput is 1.63E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.671775e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.671559e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.674155e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.684139e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277390198115864E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.1460s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7047s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4413s for 8192 events => throughput is 1.86E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.1397s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7044s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4354s for 8192 events => throughput is 1.88E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803774416711566E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 7.1917s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3395s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.8523s for 90112 events => throughput is 1.86E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.1254s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3176s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.8078s for 90112 events => throughput is 1.87E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.912795e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.932083e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.909696e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.934934e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277396394633404E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 1.3662s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8206s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5456s for 8192 events => throughput is 1.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3342s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8031s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5312s for 8192 events => throughput is 1.54E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803777741065333E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 8.4389s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4516s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.9874s for 90112 events => throughput is 1.51E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.3073s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4189s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.8884s for 90112 events => throughput is 1.53E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.534307e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.547676e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.484518e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.546957e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277400478491260E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.7763s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7549s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.82E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7736s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7522s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0215s for 8192 events => throughput is 3.81E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803779990154892E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.6207s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3864s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2342s for 90112 events => throughput is 3.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5981s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3628s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2353s for 90112 events => throughput is 3.83E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.582914e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.602414e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.939400e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.925045e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.483584e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.484752e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.662803e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.656642e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.489429e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.490786e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.631443e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.725267e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.463590e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.471712e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.531910e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.530964e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 7032d72896..9bed8b02d9 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,17 +15,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_20:04:02 +DATE: 2023-11-09_18:37:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 48 events (found 439 events) - [COUNTERS] PROGRAM TOTAL : 4.4626s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2774s - [COUNTERS] Fortran MEs ( 1 ) : 4.1852s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3681s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2752s + [COUNTERS] Fortran MEs ( 1 ) : 4.0929s for 8192 events => throughput is 2.00E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277311352988E-004] fbridge_mode=0 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.4427s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2777s - [COUNTERS] Fortran MEs ( 1 ) : 4.1649s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.3422s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2703s + [COUNTERS] Fortran MEs ( 1 ) : 4.0719s for 8192 events => throughput is 2.01E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725748421164E-004] fbridge_mode=0 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 48.3675s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9183s - [COUNTERS] Fortran MEs ( 1 ) : 46.4493s for 90112 events => throughput is 1.94E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.1722s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8864s + [COUNTERS] Fortran MEs ( 1 ) : 45.2857s for 90112 events => throughput is 1.99E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277432965013E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 9.0356s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6432s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3924s for 8192 events => throughput is 1.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.6914s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4356s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2558s for 8192 events => throughput is 1.92E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725813026109E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 54.3841s - [COUNTERS] Fortran Overhead ( 0 ) : 6.2075s - [COUNTERS] CudaCpp MEs ( 2 ) : 48.1766s for 90112 events => throughput is 1.87E+03 events/s + [COUNTERS] PROGRAM TOTAL : 54.0099s + [COUNTERS] Fortran Overhead ( 0 ) : 6.0604s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.9495s for 90112 events => throughput is 1.88E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.891623e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.955214e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.924168e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.962469e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277430934464E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 4.7893s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5036s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2857s for 8192 events => throughput is 3.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7696s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4653s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3043s for 8192 events => throughput is 3.56E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725816246317E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 29.4680s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1631s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.3048s for 90112 events => throughput is 3.56E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.7487s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0795s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.6692s for 90112 events => throughput is 3.65E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.703810e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.767280e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.713606e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.771152e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.2254s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2372s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9882s for 8192 events => throughput is 8.29E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.1933s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2225s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9708s for 8192 events => throughput is 8.44E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 13.9261s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8936s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.0324s for 90112 events => throughput is 8.17E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.6387s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8343s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.8044s for 90112 events => throughput is 8.34E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.503062e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.765902e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.519397e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.708316e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.0128s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1323s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8805s for 8192 events => throughput is 9.30E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.9610s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1075s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8536s for 8192 events => throughput is 9.60E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 12.3871s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7750s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.6121s for 90112 events => throughput is 9.37E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.2117s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7290s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4827s for 90112 events => throughput is 9.50E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.683983e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.837213e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.679001e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.813722e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277419683297E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 2.5013s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3879s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1134s for 8192 events => throughput is 7.36E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4206s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3396s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0809s for 8192 events => throughput is 7.58E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725810769321E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 15.3721s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0357s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.3363s for 90112 events => throughput is 7.30E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.8540s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9517s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9023s for 90112 events => throughput is 7.57E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.423059e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.664729e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.425324e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.661148e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0003628 [3.6277277293084707E-004] fbridge_mode=1 [UNWEIGHT] Wrote 59 events (found 420 events) - [COUNTERS] PROGRAM TOTAL : 0.8158s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7828s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0331s for 8192 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8068s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7745s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0323s for 8192 events => throughput is 2.54E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000158 [1.5803725738731039E-004] fbridge_mode=1 [UNWEIGHT] Wrote 207 events (found 1235 events) - [COUNTERS] PROGRAM TOTAL : 2.7756s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4130s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3626s for 90112 events => throughput is 2.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7640s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4053s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3587s for 90112 events => throughput is 2.51E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.294705e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.297023e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.524485e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.536170e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.113307e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.107408e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.174133e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.153471e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.119833e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.118088e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.183136e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.176343e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.103258e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.120562e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.436179e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.436751e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 568f545851..635bc8aab0 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -2,9 +2,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' +make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_20:09:47 +DATE: 2023-11-09_18:43:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -51,14 +51,552 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' -ERROR! ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' failed -d R # 5 > -0.0 -0.0 -0.0 0.4 0.4 -d R # 6 > -0.0 -0.0 -0.0 -0.0 0.4 -s min # 3> 0.0119716.0 29929.0 29929.0 0.0 -s min # 4> 0.0 0.0 29929.0 29929.0 0.0 -s min # 5> 0.0 0.0 0.0 0.0 0.0 -s min # 6> 0.0 0.0 0.0 0.0 0.0 -xqcutij # 3> 0.0 0.0 0.0 0.0 0.0 -xqcutij # 4> 0.0 0.0 0.0 0.0 0.0 -xqcutij # 5> 0.0 0.0 0.0 0.0 0.0 -xqcutij # 6> 0.0 0.0 0.0 0.0 0.0 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693100945435802E-006] fbridge_mode=0 + [UNWEIGHT] Wrote 1 events (found 166 events) + [COUNTERS] PROGRAM TOTAL : 96.1979s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4594s + [COUNTERS] Fortran MEs ( 1 ) : 95.7384s for 8192 events => throughput is 8.56E+01 events/s + +*** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693100945435802E-006] fbridge_mode=0 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 96.1938s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4572s + [COUNTERS] Fortran MEs ( 1 ) : 95.7366s for 8192 events => throughput is 8.56E+01 events/s + +*** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 1056.1191s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1851s + [COUNTERS] Fortran MEs ( 1 ) : 1051.9341s for 90112 events => throughput is 8.57E+01 events/s + +*** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693100945435831E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 221.2522s + [COUNTERS] Fortran Overhead ( 0 ) : 101.5022s + [COUNTERS] CudaCpp MEs ( 2 ) : 119.7500s for 8192 events => throughput is 6.84E+01 events/s + +*** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1693100945435831E-006) differ by less than 2E-14 (2.4424906541753444e-15) + +*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436158813953E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 1409.0435s + [COUNTERS] Fortran Overhead ( 0 ) : 99.0565s + [COUNTERS] CudaCpp MEs ( 2 ) : 1309.9869s for 90112 events => throughput is 6.88E+01 events/s + +*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813953E-007) differ by less than 2E-14 (1.1102230246251565e-15) + +*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.535302e+01 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.232167e+01 ) sec^-1 + +*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693100945435827E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 107.7463s + [COUNTERS] Fortran Overhead ( 0 ) : 49.5074s + [COUNTERS] CudaCpp MEs ( 2 ) : 58.2390s for 8192 events => throughput is 1.41E+02 events/s + +*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1693100945435827E-006) differ by less than 2E-14 (2.220446049250313e-15) + +*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 695.6110s + [COUNTERS] Fortran Overhead ( 0 ) : 53.4125s + [COUNTERS] CudaCpp MEs ( 2 ) : 642.1984s for 90112 events => throughput is 1.40E+02 events/s + +*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007) differ by less than 2E-14 (8.881784197001252e-16) + +*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.667754e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.672792e+02 ) sec^-1 + +*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 50.7441s + [COUNTERS] Fortran Overhead ( 0 ) : 23.3520s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.3921s for 8192 events => throughput is 2.99E+02 events/s + +*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1693100945435829E-006) differ by less than 2E-14 (2.4424906541753444e-15) + +*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 331.0298s + [COUNTERS] Fortran Overhead ( 0 ) : 27.1582s + [COUNTERS] CudaCpp MEs ( 2 ) : 303.8716s for 90112 events => throughput is 2.97E+02 events/s + +*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007) differ by less than 2E-14 (8.881784197001252e-16) + +*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.602735e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.607119e+02 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 44.2409s + [COUNTERS] Fortran Overhead ( 0 ) : 20.3557s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.8852s for 8192 events => throughput is 3.43E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1693100945435829E-006) differ by less than 2E-14 (2.4424906541753444e-15) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 289.3981s + [COUNTERS] Fortran Overhead ( 0 ) : 23.9732s + [COUNTERS] CudaCpp MEs ( 2 ) : 265.4249s for 90112 events => throughput is 3.40E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007) differ by less than 2E-14 (8.881784197001252e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.111160e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.141844e+02 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693100945435829E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 45.6199s + [COUNTERS] Fortran Overhead ( 0 ) : 22.4059s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.2139s for 8192 events => throughput is 3.53E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1693100945435829E-006) differ by less than 2E-14 (2.4424906541753444e-15) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 283.6130s + [COUNTERS] Fortran Overhead ( 0 ) : 26.2046s + [COUNTERS] CudaCpp MEs ( 2 ) : 257.4085s for 90112 events => throughput is 3.50E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007) differ by less than 2E-14 (8.881784197001252e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.763228e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.741992e+02 ) sec^-1 + +*** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693100945435838E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 4.1979s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1190s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0789s for 8192 events => throughput is 7.59E+03 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1693100945435838E-006) differ by less than 2E-14 (3.1086244689504383e-15) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436158813958E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 18.6565s + [COUNTERS] Fortran Overhead ( 0 ) : 6.7674s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.8891s for 90112 events => throughput is 7.58E+03 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436158813958E-007) differ by less than 2E-14 (8.881784197001252e-16) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.527117e+03 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.256112e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.240392e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.568765e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.279873e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.441727e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.268118e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.240204e+03 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index e844ee5b79..9a7b15ddba 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_20:09:50 +DATE: 2023-11-09_20:10:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -51,14 +51,552 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' -ERROR! ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' failed -d R # 5 > -0.0 -0.0 -0.0 0.4 0.4 -d R # 6 > -0.0 -0.0 -0.0 -0.0 0.4 -s min # 3> 0.0119716.0 29929.0 29929.0 0.0 -s min # 4> 0.0 0.0 29929.0 29929.0 0.0 -s min # 5> 0.0 0.0 0.0 0.0 0.0 -s min # 6> 0.0 0.0 0.0 0.0 0.0 -xqcutij # 3> 0.0 0.0 0.0 0.0 0.0 -xqcutij # 4> 0.0 0.0 0.0 0.0 0.0 -xqcutij # 5> 0.0 0.0 0.0 0.0 0.0 -xqcutij # 6> 0.0 0.0 0.0 0.0 0.0 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693100945435802E-006] fbridge_mode=0 + [UNWEIGHT] Wrote 1 events (found 166 events) + [COUNTERS] PROGRAM TOTAL : 95.6517s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4537s + [COUNTERS] Fortran MEs ( 1 ) : 95.1980s for 8192 events => throughput is 8.61E+01 events/s + +*** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693100945435802E-006] fbridge_mode=0 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 95.5775s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4538s + [COUNTERS] Fortran MEs ( 1 ) : 95.1237s for 8192 events => throughput is 8.61E+01 events/s + +*** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 1055.1274s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1731s + [COUNTERS] Fortran MEs ( 1 ) : 1050.9543s for 90112 events => throughput is 8.57E+01 events/s + +*** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1694768344939596E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 198.8691s + [COUNTERS] Fortran Overhead ( 0 ) : 90.2534s + [COUNTERS] CudaCpp MEs ( 2 ) : 108.6157s for 8192 events => throughput is 7.54E+01 events/s + +*** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1694768344939596E-006) differ by less than 4E-4 (0.00014259686216466783) + +*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1361436150871156E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 1275.3669s + [COUNTERS] Fortran Overhead ( 0 ) : 93.9491s + [COUNTERS] CudaCpp MEs ( 2 ) : 1181.4178s for 90112 events => throughput is 7.63E+01 events/s + +*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361436150871156E-007) differ by less than 4E-4 (0.00014045934987350073) + +*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.083570e+01 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.167448e+01 ) sec^-1 + +*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1694765850750953E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 49.8398s + [COUNTERS] Fortran Overhead ( 0 ) : 23.4099s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.4299s for 8192 events => throughput is 3.10E+02 events/s + +*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1694765850750953E-006) differ by less than 4E-4 (0.00014238355787066226) + +*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1361430669586527E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 320.3836s + [COUNTERS] Fortran Overhead ( 0 ) : 26.9904s + [COUNTERS] CudaCpp MEs ( 2 ) : 293.3932s for 90112 events => throughput is 3.07E+02 events/s + +*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361430669586527E-007) differ by less than 4E-4 (0.00014020271663550687) + +*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.524011e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.562557e+02 ) sec^-1 + +*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1694764951124567E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 25.3018s + [COUNTERS] Fortran Overhead ( 0 ) : 11.8221s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.4798s for 8192 events => throughput is 6.08E+02 events/s + +*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1694764951124567E-006) differ by less than 4E-4 (0.00014230662135994443) + +*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1361430425531218E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 161.8530s + [COUNTERS] Fortran Overhead ( 0 ) : 15.4501s + [COUNTERS] CudaCpp MEs ( 2 ) : 146.4028s for 90112 events => throughput is 6.16E+02 events/s + +*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361430425531218E-007) differ by less than 4E-4 (0.0001401912899885449) + +*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.213869e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.163477e+02 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1694764951124567E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 22.2497s + [COUNTERS] Fortran Overhead ( 0 ) : 10.3581s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.8916s for 8192 events => throughput is 6.89E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1694764951124567E-006) differ by less than 4E-4 (0.00014230662135994443) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1361430425531218E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 144.5243s + [COUNTERS] Fortran Overhead ( 0 ) : 14.0601s + [COUNTERS] CudaCpp MEs ( 2 ) : 130.4642s for 90112 events => throughput is 6.91E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361430425531218E-007) differ by less than 4E-4 (0.0001401912899885449) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.261245e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.179572e+02 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1694767957195604E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 22.8272s + [COUNTERS] Fortran Overhead ( 0 ) : 11.2607s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.5665s for 8192 events => throughput is 7.08E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1694767957195604E-006) differ by less than 4E-4 (0.00014256370209930758) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1361435956349820E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 143.3402s + [COUNTERS] Fortran Overhead ( 0 ) : 14.9961s + [COUNTERS] CudaCpp MEs ( 2 ) : 128.3441s for 90112 events => throughput is 7.02E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361435956349820E-007) differ by less than 4E-4 (0.00014045024240250115) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.537594e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.456699e+02 ) sec^-1 + +*** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1694770708195000E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 2.4571s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9676s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4895s for 8192 events => throughput is 1.67E+04 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1694770708195000E-006) differ by less than 4E-4 (0.00014279896898083955) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1361443477565659E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 11.0626s + [COUNTERS] Fortran Overhead ( 0 ) : 5.6077s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4549s for 90112 events => throughput is 1.65E+04 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1361443477565659E-007) differ by less than 4E-4 (0.0001408023850304474) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.640892e+04 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.619412e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.340657e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.426283e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.326049e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.360046e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.341201e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.441486e+03 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 43bf5072f2..e947131942 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -2,10 +2,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,17 +15,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_20:09:53 +DATE: 2023-11-09_21:16:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -51,14 +51,552 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' -ERROR! ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' failed -d R # 5 > -0.0 -0.0 -0.0 0.4 0.4 -d R # 6 > -0.0 -0.0 -0.0 -0.0 0.4 -s min # 3> 0.0119716.0 29929.0 29929.0 0.0 -s min # 4> 0.0 0.0 29929.0 29929.0 0.0 -s min # 5> 0.0 0.0 0.0 0.0 0.0 -s min # 6> 0.0 0.0 0.0 0.0 0.0 -xqcutij # 3> 0.0 0.0 0.0 0.0 0.0 -xqcutij # 4> 0.0 0.0 0.0 0.0 0.0 -xqcutij # 5> 0.0 0.0 0.0 0.0 0.0 -xqcutij # 6> 0.0 0.0 0.0 0.0 0.0 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693100945435802E-006] fbridge_mode=0 + [UNWEIGHT] Wrote 1 events (found 166 events) + [COUNTERS] PROGRAM TOTAL : 95.6107s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4599s + [COUNTERS] Fortran MEs ( 1 ) : 95.1508s for 8192 events => throughput is 8.61E+01 events/s + +*** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693100945435802E-006] fbridge_mode=0 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 95.5844s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4540s + [COUNTERS] Fortran MEs ( 1 ) : 95.1304s for 8192 events => throughput is 8.61E+01 events/s + +*** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436158813976E-007] fbridge_mode=0 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 1052.2893s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1570s + [COUNTERS] Fortran MEs ( 1 ) : 1048.1323s for 90112 events => throughput is 8.60E+01 events/s + +*** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693101016896846E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 223.0748s + [COUNTERS] Fortran Overhead ( 0 ) : 103.3973s + [COUNTERS] CudaCpp MEs ( 2 ) : 119.6775s for 8192 events => throughput is 6.85E+01 events/s + +*** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1693101016896846E-006) differ by less than 2E-4 (6.111385175699979e-09) + +*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436275882778E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 1425.4469s + [COUNTERS] Fortran Overhead ( 0 ) : 107.1167s + [COUNTERS] CudaCpp MEs ( 2 ) : 1318.3302s for 90112 events => throughput is 6.84E+01 events/s + +*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436275882778E-007) differ by less than 2E-4 (5.48115042242614e-09) + +*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.990567e+01 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.033316e+01 ) sec^-1 + +*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693101020910778E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 112.1583s + [COUNTERS] Fortran Overhead ( 0 ) : 51.1368s + [COUNTERS] CudaCpp MEs ( 2 ) : 61.0216s for 8192 events => throughput is 1.34E+02 events/s + +*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1693101020910778E-006) differ by less than 2E-4 (6.454658807442115e-09) + +*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436284111598E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 719.1467s + [COUNTERS] Fortran Overhead ( 0 ) : 54.6964s + [COUNTERS] CudaCpp MEs ( 2 ) : 664.4503s for 90112 events => throughput is 1.36E+02 events/s + +*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436284111598E-007) differ by less than 2E-4 (5.866422903011426e-09) + +*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.625730e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.622146e+02 ) sec^-1 + +*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 48.7268s + [COUNTERS] Fortran Overhead ( 0 ) : 22.2016s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.5252s for 8192 events => throughput is 3.09E+02 events/s + +*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1693101021831071E-006) differ by less than 2E-4 (6.5333627397023974e-09) + +*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 312.7787s + [COUNTERS] Fortran Overhead ( 0 ) : 25.8939s + [COUNTERS] CudaCpp MEs ( 2 ) : 286.8848s for 90112 events => throughput is 3.14E+02 events/s + +*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436281462142E-007) differ by less than 2E-4 (5.742375686068613e-09) + +*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.761983e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.775859e+02 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 42.1739s + [COUNTERS] Fortran Overhead ( 0 ) : 19.2356s + [COUNTERS] CudaCpp MEs ( 2 ) : 22.9383s for 8192 events => throughput is 3.57E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1693101021831071E-006) differ by less than 2E-4 (6.5333627397023974e-09) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 277.3137s + [COUNTERS] Fortran Overhead ( 0 ) : 23.0478s + [COUNTERS] CudaCpp MEs ( 2 ) : 254.2659s for 90112 events => throughput is 3.54E+02 events/s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436281462142E-007) differ by less than 2E-4 (5.742375686068613e-09) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.346725e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.360141e+02 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693101021831071E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 45.3760s + [COUNTERS] Fortran Overhead ( 0 ) : 21.9554s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.4206s for 8192 events => throughput is 3.50E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1693101021831071E-006) differ by less than 2E-4 (6.5333627397023974e-09) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436281462142E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 283.3743s + [COUNTERS] Fortran Overhead ( 0 ) : 25.7277s + [COUNTERS] CudaCpp MEs ( 2 ) : 257.6465s for 90112 events => throughput is 3.50E+02 events/s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436281462142E-007) differ by less than 2E-4 (5.742375686068613e-09) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.787133e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.796022e+02 ) sec^-1 + +*** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 1.169e-06 [1.1693100942770687E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 15 events (found 163 events) + [COUNTERS] PROGRAM TOTAL : 3.5891s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7218s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8672s for 8192 events => throughput is 9.45E+03 events/s + +*** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (1.1693100945435802E-006) and cpp (1.1693100942770687E-006) differ by less than 2E-4 (2.2792201459509442e-10) + +*** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! Channel number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.136e-07 [2.1358436157495368E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 84 events (found 808 events) + [COUNTERS] PROGRAM TOTAL : 15.8181s + [COUNTERS] Fortran Overhead ( 0 ) : 6.3338s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4843s for 90112 events => throughput is 9.50E+03 events/s + +*** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.1358436158813976E-007) and cpp (2.1358436157495368E-007) differ by less than 2E-4 (6.173705990875078e-11) + +*** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.489325e+03 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.086868e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.112402e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.163573e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.112546e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.110187e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.113455e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.651684e+03 ) sec^-1 + +TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 2a2ae334de..17d6db3749 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -3,9 +3,9 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_20:08:20 +DATE: 2023-11-09_18:42:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3085s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2380s - [COUNTERS] Fortran MEs ( 1 ) : 0.0705s for 8192 events => throughput is 1.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3065s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2361s + [COUNTERS] Fortran MEs ( 1 ) : 0.0704s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3042s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2333s - [COUNTERS] Fortran MEs ( 1 ) : 0.0708s for 8192 events => throughput is 1.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2994s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2293s + [COUNTERS] Fortran MEs ( 1 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2114s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4363s - [COUNTERS] Fortran MEs ( 1 ) : 0.7751s for 90112 events => throughput is 1.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.1760s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4125s + [COUNTERS] Fortran MEs ( 1 ) : 0.7635s for 90112 events => throughput is 1.18E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3922s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3158s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0765s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3841s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3081s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0760s for 8192 events => throughput is 1.08E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.3858s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5438s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8420s for 90112 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3472s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5183s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8289s for 90112 events => throughput is 1.09E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.080426e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.089572e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.086485e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.081996e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3230s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2818s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0412s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3165s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2761s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615872] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.9553s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4982s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4571s for 90112 events => throughput is 1.97E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9307s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4770s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4537s for 90112 events => throughput is 1.99E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.984398e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.997353e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.942353e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.027039e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2839s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2604s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0236s for 8192 events => throughput is 3.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2805s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2572s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 8192 events => throughput is 3.52E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7384s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4779s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2605s for 90112 events => throughput is 3.46E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7189s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4610s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2579s for 90112 events => throughput is 3.49E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.360936e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.495576e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.508331e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.465419e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2789s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2578s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2772s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2561s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.89E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7099s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4763s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2336s for 90112 events => throughput is 3.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6775s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4482s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2293s for 90112 events => throughput is 3.93E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.911581e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.760921e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.775740e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.978083e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3043s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2722s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2977s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2665s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0311s for 8192 events => throughput is 2.63E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615863] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8369s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4881s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3488s for 90112 events => throughput is 2.58E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8099s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4686s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3413s for 90112 events => throughput is 2.64E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.489296e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.568787e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.512748e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.561174e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703733] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6694s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6687s + [COUNTERS] PROGRAM TOTAL : 0.6636s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6629s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.19E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615869] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.9131s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9051s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 90112 events => throughput is 1.13E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.8698s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8622s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.18E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.578046e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.555687e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.918680e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.006338e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.385541e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.515172e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.515910e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.526258e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.366310e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.533570e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.781318e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.783496e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.383694e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.532375e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.778819e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.774257e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 76ba714558..a15824491a 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -4,8 +4,8 @@ CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,13 +15,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_20:08:49 +DATE: 2023-11-09_18:42:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3082s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2374s - [COUNTERS] Fortran MEs ( 1 ) : 0.0709s for 8192 events => throughput is 1.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3036s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2341s + [COUNTERS] Fortran MEs ( 1 ) : 0.0695s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3089s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2376s - [COUNTERS] Fortran MEs ( 1 ) : 0.0713s for 8192 events => throughput is 1.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3003s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2303s + [COUNTERS] Fortran MEs ( 1 ) : 0.0699s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2176s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4416s - [COUNTERS] Fortran MEs ( 1 ) : 0.7760s for 90112 events => throughput is 1.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2069s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4340s + [COUNTERS] Fortran MEs ( 1 ) : 0.7729s for 90112 events => throughput is 1.17E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050316058770007] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3831s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3106s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0725s for 8192 events => throughput is 1.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3749s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3033s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0716s for 8192 events => throughput is 1.14E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182797520666] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.3282s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5337s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7945s for 90112 events => throughput is 1.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2764s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4961s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7803s for 90112 events => throughput is 1.15E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.150985e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.160144e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.152855e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.172915e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313133963987] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2893s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2630s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0263s for 8192 events => throughput is 3.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2845s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2592s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0253s for 8192 events => throughput is 3.23E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179276862181] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7627s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4776s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2851s for 90112 events => throughput is 3.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7355s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4565s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2790s for 90112 events => throughput is 3.23E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.058447e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.194415e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.117460e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.097783e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313344346482] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2617s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2490s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0127s for 8192 events => throughput is 6.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2583s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2455s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0128s for 8192 events => throughput is 6.41E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179137376883] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6264s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4861s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1403s for 90112 events => throughput is 6.42E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5855s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4486s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1369s for 90112 events => throughput is 6.58E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.320941e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.397086e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.304004e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.385448e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050313344346482] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2672s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2557s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0115s for 8192 events => throughput is 7.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2587s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2472s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0114s for 8192 events => throughput is 7.16E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801179137376883] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6213s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4898s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1315s for 90112 events => throughput is 6.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5778s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4506s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1272s for 90112 events => throughput is 7.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.800881e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.864944e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.852775e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.826763e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050317064561834] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2707s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2550s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0157s for 8192 events => throughput is 5.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2685s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2527s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0158s for 8192 events => throughput is 5.17E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182143140752] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.6597s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4813s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1784s for 90112 events => throughput is 5.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6231s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4511s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1720s for 90112 events => throughput is 5.24E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.682841e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.932364e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.814031e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.764394e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050319131407651] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6668s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6663s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.60E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6586s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6581s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.57E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801186038252196] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.9031s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8970s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 90112 events => throughput is 1.46E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9395s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9332s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.43E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.810157e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.830948e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.442986e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.471030e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.776377e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.130497e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.714442e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.724199e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.784654e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.113825e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.791545e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.756435e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.353442e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.594258e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.984091e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.959495e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index d9f19e3972..3468beddc5 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -3,8 +3,8 @@ CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y @@ -15,15 +15,15 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -33,7 +33,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2023-11-03_20:09:17 +DATE: 2023-11-09_18:42:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 78 events (found 561 events) - [COUNTERS] PROGRAM TOTAL : 0.3076s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2365s - [COUNTERS] Fortran MEs ( 1 ) : 0.0711s for 8192 events => throughput is 1.15E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3047s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2346s + [COUNTERS] Fortran MEs ( 1 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333309703716] fbridge_mode=0 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3048s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2341s - [COUNTERS] Fortran MEs ( 1 ) : 0.0707s for 8192 events => throughput is 1.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3065s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2359s + [COUNTERS] Fortran MEs ( 1 ) : 0.0705s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182648615874] fbridge_mode=0 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.2173s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4390s - [COUNTERS] Fortran MEs ( 1 ) : 0.7783s for 90112 events => throughput is 1.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2175s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4409s + [COUNTERS] Fortran MEs ( 1 ) : 0.7766s for 90112 events => throughput is 1.16E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333282657206] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3915s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3150s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0766s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3854s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3097s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0757s for 8192 events => throughput is 1.08E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182636608796] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 2.4080s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5555s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8525s for 90112 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3546s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5224s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8322s for 90112 events => throughput is 1.08E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -182,12 +182,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.026153e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.083780e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.029864e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.087409e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,8 +210,8 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333282657201] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3183s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2784s + [COUNTERS] PROGRAM TOTAL : 0.3153s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2754s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0399s for 8192 events => throughput is 2.05E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182636608810] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.9529s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5041s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4488s for 90112 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9817s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5338s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4479s for 90112 events => throughput is 2.01E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -258,12 +258,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.013366e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.021169e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.020889e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.048865e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2872s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2636s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0235s for 8192 events => throughput is 3.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2833s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2602s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 8192 events => throughput is 3.54E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7508s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4909s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2599s for 90112 events => throughput is 3.47E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7176s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4613s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2564s for 90112 events => throughput is 3.52E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -334,12 +334,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.380135e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.495609e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.471740e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.519650e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.2817s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2612s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0206s for 8192 events => throughput is 3.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.2747s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2542s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0205s for 8192 events => throughput is 3.99E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.7107s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4801s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2306s for 90112 events => throughput is 3.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6910s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4668s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2243s for 90112 events => throughput is 4.02E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -410,12 +410,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.890792e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.857183e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.973788e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.991341e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333291481387] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.3050s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2711s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0339s for 8192 events => throughput is 2.42E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3063s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2736s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0327s for 8192 events => throughput is 2.50E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182638680733] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8573s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4973s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3600s for 90112 events => throughput is 2.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8379s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4792s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3586s for 90112 events => throughput is 2.51E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -486,12 +486,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.438047e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.546786e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.395865e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.503592e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2605 [0.26050333301029693] fbridge_mode=1 [UNWEIGHT] Wrote 81 events (found 540 events) - [COUNTERS] PROGRAM TOTAL : 0.6671s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6664s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.19E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.6613s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6607s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.22E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.218 [0.21801182637219935] fbridge_mode=1 [UNWEIGHT] Wrote 853 events (found 1849 events) - [COUNTERS] PROGRAM TOTAL : 1.8923s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8845s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 90112 events => throughput is 1.16E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.8739s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8663s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.19E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -562,41 +562,41 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.584492e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.582711e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.972938e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.041620e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.377134e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.534455e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.496287e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.524256e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.388325e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.513154e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.763560e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.797491e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.382255e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.528865e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.773123e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.779970e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 4e0cc4f360..96be4f25ce 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:00:16 +DATE: 2023-11-09_17:36:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.995135e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.942022e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.073010e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.632744e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.846433e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.013402e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.649523 sec - 2,606,897,569 cycles # 2.955 GHz - 4,039,165,920 instructions # 1.55 insn per cycle - 0.938736477 seconds time elapsed +TOTAL : 0.666402 sec + 2,677,197,972 cycles # 3.012 GHz + 4,052,373,824 instructions # 1.51 insn per cycle + 0.957128261 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -77,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.116390e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.309346e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.309346e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.129159e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.324668e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.324668e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.039128 sec - 18,293,625,810 cycles # 3.027 GHz - 44,037,997,118 instructions # 2.41 insn per cycle - 6.044375342 seconds time elapsed +TOTAL : 5.970581 sec + 18,294,560,469 cycles # 3.063 GHz + 44,035,841,714 instructions # 2.41 insn per cycle + 5.975709847 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.650519e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.159299e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.159299e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.674808e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.201099e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.201099e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.212186 sec - 12,761,177,625 cycles # 3.027 GHz - 31,004,602,670 instructions # 2.43 insn per cycle - 4.217391637 seconds time elapsed +TOTAL : 4.151985 sec + 12,801,375,184 cycles # 3.080 GHz + 31,001,968,290 instructions # 2.42 insn per cycle + 4.157180427 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.065360e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.886676e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.886676e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.097286e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.929276e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.929276e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.440327 sec - 10,045,086,881 cycles # 2.916 GHz - 19,380,193,658 instructions # 1.93 insn per cycle - 3.445672409 seconds time elapsed +TOTAL : 3.388202 sec + 10,019,877,774 cycles # 2.954 GHz + 19,377,611,613 instructions # 1.93 insn per cycle + 3.393320382 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.092180e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.955480e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.955480e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.171888e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.054473e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.054473e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.409304 sec - 9,718,965,428 cycles # 2.848 GHz - 18,998,332,681 instructions # 1.95 insn per cycle - 3.414677998 seconds time elapsed +TOTAL : 3.283560 sec + 9,692,698,438 cycles # 2.948 GHz + 19,006,248,514 instructions # 1.96 insn per cycle + 3.288694745 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.821062e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.417007e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.417007e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.836531e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.447502e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.447502e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.852694 sec - 8,598,148,642 cycles # 2.229 GHz - 15,740,848,417 instructions # 1.83 insn per cycle - 3.858015954 seconds time elapsed +TOTAL : 3.828285 sec + 8,619,412,035 cycles # 2.250 GHz + 15,739,302,747 instructions # 1.83 insn per cycle + 3.833534805 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index a2a2220e0b..46e9abca4a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:34:09 +DATE: 2023-11-09_18:08:58 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.616160e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.542311e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.542311e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.786999e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.766835e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.766835e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.257075 sec - 7,500,299,564 cycles # 3.000 GHz - 13,128,281,558 instructions # 1.75 insn per cycle - 2.557069801 seconds time elapsed +TOTAL : 2.197852 sec + 7,407,513,320 cycles # 3.040 GHz + 13,213,549,787 instructions # 1.78 insn per cycle + 2.495471586 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -86,14 +86,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.074156e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.251964e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.251964e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.082808e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.262532e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.262532e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.457469 sec - 19,613,725,947 cycles # 3.035 GHz - 44,260,538,354 instructions # 2.26 insn per cycle - 6.464068851 seconds time elapsed +TOTAL : 6.417727 sec + 19,594,664,001 cycles # 3.052 GHz + 44,265,878,138 instructions # 2.26 insn per cycle + 6.424119903 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -114,14 +114,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.537992e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.980628e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.980628e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.589377e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.044221e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.044221e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.703362 sec - 14,014,545,412 cycles # 2.976 GHz - 31,843,317,256 instructions # 2.27 insn per cycle - 4.710044451 seconds time elapsed +TOTAL : 4.559857 sec + 14,005,526,343 cycles # 3.068 GHz + 31,844,006,198 instructions # 2.27 insn per cycle + 4.566322148 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -142,14 +142,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.930954e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.630364e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.630364e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.929770e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.628189e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.628189e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.870178 sec - 11,351,058,249 cycles # 2.929 GHz - 20,737,271,008 instructions # 1.83 insn per cycle - 3.876822605 seconds time elapsed +TOTAL : 3.878054 sec + 11,287,723,645 cycles # 2.906 GHz + 20,738,072,181 instructions # 1.84 insn per cycle + 3.884538371 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -170,14 +170,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.936889e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.651989e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.651989e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.014169e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.779352e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.779352e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.871998 sec - 11,000,759,855 cycles # 2.837 GHz - 20,365,657,381 instructions # 1.85 insn per cycle - 3.879015734 seconds time elapsed +TOTAL : 3.727856 sec + 11,041,223,612 cycles # 2.958 GHz + 20,355,670,345 instructions # 1.84 insn per cycle + 3.734291913 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -198,14 +198,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.694377e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.207135e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.207135e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.744355e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.276403e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.276403e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.335020 sec - 9,935,731,633 cycles # 2.289 GHz - 16,882,918,411 instructions # 1.70 insn per cycle - 4.341683669 seconds time elapsed +TOTAL : 4.223001 sec + 9,961,082,180 cycles # 2.356 GHz + 16,884,642,255 instructions # 1.70 insn per cycle + 4.229415228 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index dedce3e2ef..06dd49c8ef 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:47:12 +DATE: 2023-11-09_18:21:50 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.493472e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.526211e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.980085e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.833760e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.622748e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.982780e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.335531 sec - 4,653,241,552 cycles # 2.971 GHz - 7,232,975,239 instructions # 1.55 insn per cycle - 1.623039981 seconds time elapsed +TOTAL : 1.311946 sec + 4,695,073,853 cycles # 3.035 GHz + 7,228,449,301 instructions # 1.54 insn per cycle + 1.606166442 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -77,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.100587e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.292616e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.292616e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.133856e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.330921e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.330921e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 6.487751 sec - 19,390,492,430 cycles # 2.987 GHz - 44,137,957,280 instructions # 2.28 insn per cycle - 6.493082825 seconds time elapsed +TOTAL : 6.295519 sec + 19,403,964,054 cycles # 3.081 GHz + 44,141,070,523 instructions # 2.27 insn per cycle + 6.300790833 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.649039e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.157189e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.157189e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.674176e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.191162e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.191162e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.573606 sec - 13,864,290,699 cycles # 3.029 GHz - 31,004,021,041 instructions # 2.24 insn per cycle - 4.579072706 seconds time elapsed +TOTAL : 4.504649 sec + 13,863,184,367 cycles # 3.075 GHz + 31,003,513,865 instructions # 2.24 insn per cycle + 4.509943224 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.050077e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.865714e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.865714e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.015608e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.805515e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.805515e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.825144 sec - 11,151,950,602 cycles # 2.912 GHz - 19,279,192,444 instructions # 1.73 insn per cycle - 3.830421553 seconds time elapsed +TOTAL : 3.880062 sec + 11,162,114,716 cycles # 2.882 GHz + 19,285,048,189 instructions # 1.73 insn per cycle + 3.885435669 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.125943e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.996151e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.996151e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.146900e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.045970e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.045970e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.721741 sec - 10,820,749,101 cycles # 2.904 GHz - 18,706,645,976 instructions # 1.73 insn per cycle - 3.727088912 seconds time elapsed +TOTAL : 3.683003 sec + 10,893,551,236 cycles # 2.955 GHz + 18,696,669,062 instructions # 1.72 insn per cycle + 3.688290519 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.802766e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.399092e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.399092e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.858668e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.475829e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.475829e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.260983 sec - 9,758,383,682 cycles # 2.288 GHz - 15,439,422,037 instructions # 1.58 insn per cycle - 4.266311634 seconds time elapsed +TOTAL : 4.138576 sec + 9,729,969,286 cycles # 2.349 GHz + 15,438,316,077 instructions # 1.59 insn per cycle + 4.143776269 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt index 753c8feb62..148fb0d2ee 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:43:56 +DATE: 2023-11-09_18:18:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.492551e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.537742e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.994776e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.853961e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.658990e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.049126e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.978991 sec - 3,581,699,122 cycles # 2.964 GHz - 7,061,755,742 instructions # 1.97 insn per cycle - 1.265379690 seconds time elapsed +TOTAL : 0.956476 sec + 3,586,792,512 cycles # 3.034 GHz + 7,163,432,319 instructions # 2.00 insn per cycle + 1.241060065 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -77,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.108457e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.301315e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.301315e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.134189e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.330626e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.330626e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.081290 sec - 18,339,334,415 cycles # 3.014 GHz - 44,033,842,254 instructions # 2.40 insn per cycle - 6.086519540 seconds time elapsed +TOTAL : 5.945995 sec + 18,306,649,766 cycles # 3.077 GHz + 44,036,304,039 instructions # 2.41 insn per cycle + 5.951221281 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.647910e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.158230e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.158230e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.656363e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.166761e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.166761e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.219825 sec - 12,790,482,904 cycles # 3.028 GHz - 31,000,190,511 instructions # 2.42 insn per cycle - 4.225042583 seconds time elapsed +TOTAL : 4.200416 sec + 12,751,192,820 cycles # 3.033 GHz + 31,001,487,666 instructions # 2.43 insn per cycle + 4.205764852 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.046562e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.846964e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.846964e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.102659e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.940126e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.940126e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.470466 sec - 10,075,062,185 cycles # 2.899 GHz - 19,376,808,574 instructions # 1.92 insn per cycle - 3.475725491 seconds time elapsed +TOTAL : 3.381762 sec + 10,061,410,412 cycles # 2.972 GHz + 19,378,394,064 instructions # 1.93 insn per cycle + 3.387061232 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.091991e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.948349e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.948349e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.165893e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.060121e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.060121e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.411832 sec - 9,706,821,336 cycles # 2.841 GHz - 18,993,945,887 instructions # 1.96 insn per cycle - 3.417093831 seconds time elapsed +TOTAL : 3.294663 sec + 9,710,957,285 cycles # 2.944 GHz + 18,994,988,980 instructions # 1.96 insn per cycle + 3.300038627 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.817313e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.417390e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.417390e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.865019e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.483923e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.483923e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.864825 sec - 8,629,354,000 cycles # 2.231 GHz - 15,737,585,107 instructions # 1.82 insn per cycle - 3.870285071 seconds time elapsed +TOTAL : 3.767379 sec + 8,603,525,039 cycles # 2.281 GHz + 15,737,455,232 instructions # 1.83 insn per cycle + 3.772597879 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 8472c31bea..d2d2949097 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:40:37 +DATE: 2023-11-09_18:15:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.065913e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.488032e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.905997e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.240881e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.587683e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.915014e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.876732 sec - 6,299,612,348 cycles # 2.989 GHz - 11,571,253,190 instructions # 1.84 insn per cycle - 2.164294467 seconds time elapsed +TOTAL : 1.834661 sec + 6,293,478,609 cycles # 3.041 GHz + 11,504,742,224 instructions # 1.83 insn per cycle + 2.125902004 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -79,14 +79,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.111600e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.304742e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.304742e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.133681e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.328323e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.328323e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.064598 sec - 18,297,128,822 cycles # 3.015 GHz - 44,033,779,580 instructions # 2.41 insn per cycle - 6.069938342 seconds time elapsed +TOTAL : 5.944399 sec + 18,276,841,424 cycles # 3.072 GHz + 44,034,753,944 instructions # 2.41 insn per cycle + 5.949724506 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 433) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest.exe @@ -106,14 +106,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.622403e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.120612e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.120612e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.688289e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.207763e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.207763e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.285433 sec - 12,790,120,071 cycles # 2.982 GHz - 31,000,688,554 instructions # 2.42 insn per cycle - 4.290779048 seconds time elapsed +TOTAL : 4.121368 sec + 12,748,827,025 cycles # 3.090 GHz + 31,001,833,202 instructions # 2.43 insn per cycle + 4.126844954 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1643) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest.exe @@ -133,14 +133,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.044295e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.854365e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.854365e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.079781e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.896967e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.896967e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.476131 sec - 10,066,944,453 cycles # 2.893 GHz - 19,377,002,166 instructions # 1.92 insn per cycle - 3.481530813 seconds time elapsed +TOTAL : 3.417656 sec + 10,039,679,603 cycles # 2.934 GHz + 19,377,458,106 instructions # 1.93 insn per cycle + 3.423002014 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1965) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.095206e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.953285e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.953285e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.191213e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.094392e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.094392e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.401536 sec - 9,758,102,764 cycles # 2.865 GHz - 18,996,151,120 instructions # 1.95 insn per cycle - 3.406936941 seconds time elapsed +TOTAL : 3.256603 sec + 9,688,244,134 cycles # 2.971 GHz + 19,005,599,231 instructions # 1.96 insn per cycle + 3.261875957 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1689) (512y: 181) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest.exe @@ -187,14 +187,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.814025e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.410019e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.410019e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.880720e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.508965e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.508965e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.870433 sec - 8,615,604,376 cycles # 2.224 GHz - 15,736,922,136 instructions # 1.83 insn per cycle - 3.875834680 seconds time elapsed +TOTAL : 3.737405 sec + 8,601,041,918 cycles # 2.299 GHz + 15,737,525,138 instructions # 1.83 insn per cycle + 3.742726567 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 900) (512y: 154) (512z: 1258) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index b542059ad1..2943a1e3d5 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:00:50 +DATE: 2023-11-09_17:37:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.000398e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.960570e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.110004e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.636703e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.863019e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.046703e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.647549 sec - 2,611,748,045 cycles # 2.979 GHz - 4,046,502,501 instructions # 1.55 insn per cycle - 0.933750268 seconds time elapsed +TOTAL : 0.654694 sec + 2,666,558,745 cycles # 3.022 GHz + 4,096,338,325 instructions # 1.54 insn per cycle + 0.944612967 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 @@ -77,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.159227e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.372064e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.372064e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.202919e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.424199e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.424199e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.833108 sec - 17,445,226,847 cycles # 2.989 GHz - 41,885,202,351 instructions # 2.40 insn per cycle - 5.838346819 seconds time elapsed +TOTAL : 5.624164 sec + 17,409,154,909 cycles # 3.093 GHz + 41,881,099,052 instructions # 2.41 insn per cycle + 5.629252249 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 392) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.682893e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.222491e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.222491e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.734385e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.287483e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.287483e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.142121 sec - 12,470,632,862 cycles # 3.008 GHz - 30,166,171,065 instructions # 2.42 insn per cycle - 4.147564686 seconds time elapsed +TOTAL : 4.020839 sec + 12,439,753,645 cycles # 3.090 GHz + 30,163,334,779 instructions # 2.42 insn per cycle + 4.026082449 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1611) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.069225e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.895121e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.895121e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.071596e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.904428e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.904428e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.437470 sec - 9,952,077,094 cycles # 2.891 GHz - 19,112,450,451 instructions # 1.92 insn per cycle - 3.442739539 seconds time elapsed +TOTAL : 3.432943 sec + 9,954,541,311 cycles # 2.896 GHz + 19,109,473,980 instructions # 1.92 insn per cycle + 3.438069931 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1930) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.130212e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.018241e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.018241e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.172502e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.071351e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.071351e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.352335 sec - 9,644,260,853 cycles # 2.874 GHz - 18,779,667,176 instructions # 1.95 insn per cycle - 3.357742942 seconds time elapsed +TOTAL : 3.287111 sec + 9,635,946,931 cycles # 2.927 GHz + 18,764,577,329 instructions # 1.95 insn per cycle + 3.292294749 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1661) (512y: 178) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.865497e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.495990e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.495990e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.921117e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.582437e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.582437e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.772482 sec - 8,452,356,069 cycles # 2.238 GHz - 15,617,271,494 instructions # 1.85 insn per cycle - 3.777813091 seconds time elapsed +TOTAL : 3.666524 sec + 8,448,044,488 cycles # 2.302 GHz + 15,613,692,408 instructions # 1.85 insn per cycle + 3.671704856 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 886) (512y: 156) (512z: 1239) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index 9fba89aff3..e7918e9c23 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:23:25 +DATE: 2023-11-09_17:58:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.483432e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.567049e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.058193e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.801176e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.647831e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.027831e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.676370 sec - 2,703,741,341 cycles # 2.971 GHz - 4,197,515,180 instructions # 1.55 insn per cycle - 0.967825669 seconds time elapsed +TOTAL : 0.681809 sec + 2,713,657,783 cycles # 2.966 GHz + 4,201,847,315 instructions # 1.55 insn per cycle + 0.974362645 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -77,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.672486e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.141310e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.141310e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.699910e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.178375e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.178375e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.163173 sec - 12,692,329,334 cycles # 3.045 GHz - 32,576,040,648 instructions # 2.57 insn per cycle - 4.168672183 seconds time elapsed +TOTAL : 4.094284 sec + 12,664,884,276 cycles # 3.090 GHz + 32,577,115,805 instructions # 2.57 insn per cycle + 4.099557701 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 296) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.116856e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.025219e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.025219e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.143219e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.065278e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.065278e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.372207 sec - 10,267,724,267 cycles # 3.041 GHz - 24,505,197,015 instructions # 2.39 insn per cycle - 3.377809241 seconds time elapsed +TOTAL : 3.331773 sec + 10,271,423,521 cycles # 3.079 GHz + 24,506,625,447 instructions # 2.39 insn per cycle + 3.337328311 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1251) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.304978e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.380785e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.380785e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.319805e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.394403e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.394403e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.125688 sec - 9,128,103,141 cycles # 2.916 GHz - 16,940,836,203 instructions # 1.86 insn per cycle - 3.131242434 seconds time elapsed +TOTAL : 3.108988 sec + 9,122,185,757 cycles # 2.931 GHz + 16,942,074,182 instructions # 1.86 insn per cycle + 3.114300266 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1631) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.298021e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.382509e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.382509e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.263608e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.556489e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.556489e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.144282 sec - 8,899,696,508 cycles # 2.834 GHz - 16,372,313,838 instructions # 1.84 insn per cycle - 3.149838418 seconds time elapsed +TOTAL : 3.169374 sec + 9,426,858,565 cycles # 2.970 GHz + 16,370,203,044 instructions # 1.74 insn per cycle + 3.174743316 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1370) (512y: 139) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.053092e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.845549e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.845549e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.105750e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.926413e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.926413e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.465226 sec - 7,910,184,141 cycles # 2.280 GHz - 14,591,740,895 instructions # 1.84 insn per cycle - 3.470686114 seconds time elapsed +TOTAL : 3.377253 sec + 7,897,254,276 cycles # 2.335 GHz + 14,592,693,571 instructions # 1.85 insn per cycle + 3.382567542 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1015) (512y: 158) (512z: 955) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index 9b85799057..676eafadb1 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:23:55 +DATE: 2023-11-09_17:58:39 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.480686e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.569964e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.063993e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.818208e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.668713e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.053456e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.677772 sec - 2,691,282,086 cycles # 2.960 GHz - 4,219,338,579 instructions # 1.57 insn per cycle - 0.971577356 seconds time elapsed +TOTAL : 0.673433 sec + 2,679,233,058 cycles # 2.963 GHz + 4,187,218,910 instructions # 1.56 insn per cycle + 0.965878825 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 @@ -77,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.182406e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.087943e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.087943e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.244543e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.167572e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.167572e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.286151 sec - 9,910,806,255 cycles # 3.012 GHz - 25,456,031,111 instructions # 2.57 insn per cycle - 3.291763573 seconds time elapsed +TOTAL : 3.200260 sec + 9,840,700,159 cycles # 3.071 GHz + 25,456,933,061 instructions # 2.59 insn per cycle + 3.205821754 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 249) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.467752e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.800434e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.800434e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.515705e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.876135e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.876135e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.950518 sec - 8,946,482,743 cycles # 3.027 GHz - 21,514,123,834 instructions # 2.40 insn per cycle - 2.956056552 seconds time elapsed +TOTAL : 2.896836 sec + 8,925,793,988 cycles # 3.076 GHz + 21,514,573,078 instructions # 2.41 insn per cycle + 2.902177430 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1119) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.464134e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.723435e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.723435e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.506104e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.783990e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.783990e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.952533 sec - 8,633,003,733 cycles # 2.920 GHz - 15,829,431,121 instructions # 1.83 insn per cycle - 2.958100358 seconds time elapsed +TOTAL : 2.900756 sec + 8,606,887,419 cycles # 2.962 GHz + 15,829,788,154 instructions # 1.84 insn per cycle + 2.906279310 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.533505e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.859681e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.859681e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.541955e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.879613e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.879613e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.876122 sec - 8,428,640,196 cycles # 2.926 GHz - 15,527,735,744 instructions # 1.84 insn per cycle - 2.881608685 seconds time elapsed +TOTAL : 2.869140 sec + 8,396,471,591 cycles # 2.922 GHz + 15,529,030,850 instructions # 1.85 insn per cycle + 2.874505432 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1268) (512y: 139) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.128966e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.008830e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.008830e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.119247e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.990719e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.990719e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.361119 sec - 7,560,312,259 cycles # 2.246 GHz - 14,293,668,051 instructions # 1.89 insn per cycle - 3.366622669 seconds time elapsed +TOTAL : 3.376497 sec + 7,569,554,118 cycles # 2.239 GHz + 14,295,014,243 instructions # 1.89 insn per cycle + 3.381953719 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1041) (512y: 164) (512z: 874) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 46e803358f..b0b6c7dbbf 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:01:23 +DATE: 2023-11-09_17:37:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.626199e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.328475e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.281681e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.535063e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.287307e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.259593e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.560646 sec - 2,313,886,918 cycles # 2.957 GHz - 3,567,705,327 instructions # 1.54 insn per cycle - 0.840116151 seconds time elapsed +TOTAL : 0.562225 sec + 2,332,457,444 cycles # 2.979 GHz + 3,625,755,159 instructions # 1.55 insn per cycle + 0.842176648 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -77,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.146010e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.358105e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.358105e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.164715e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.380430e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.380430e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.851033 sec - 17,813,996,987 cycles # 3.043 GHz - 43,616,814,202 instructions # 2.45 insn per cycle - 5.856069183 seconds time elapsed +TOTAL : 5.760009 sec + 17,802,097,031 cycles # 3.089 GHz + 43,613,527,077 instructions # 2.45 insn per cycle + 5.764750077 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.343466e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.599751e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.599751e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.392272e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.663586e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.663586e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.048613 sec - 9,276,606,540 cycles # 3.040 GHz - 21,930,294,042 instructions # 2.36 insn per cycle - 3.053688884 seconds time elapsed +TOTAL : 2.985891 sec + 9,233,559,019 cycles # 3.088 GHz + 21,925,837,880 instructions # 2.37 insn per cycle + 2.990875616 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.523694e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.872956e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.872956e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.561578e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.939602e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.939602e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.845518 sec - 8,308,772,789 cycles # 2.916 GHz - 15,593,301,532 instructions # 1.88 insn per cycle - 2.850623438 seconds time elapsed +TOTAL : 2.807792 sec + 8,302,482,665 cycles # 2.952 GHz + 15,590,734,796 instructions # 1.88 insn per cycle + 2.812825281 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.489948e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.840461e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.840461e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.577370e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.998184e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.998184e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.887357 sec - 8,231,785,355 cycles # 2.847 GHz - 15,437,944,905 instructions # 1.88 insn per cycle - 2.892363682 seconds time elapsed +TOTAL : 2.791624 sec + 8,243,582,435 cycles # 2.950 GHz + 15,435,159,534 instructions # 1.87 insn per cycle + 2.796691298 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.580760e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.973673e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.973673e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.534202e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.878199e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.878199e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.796324 sec - 6,629,287,981 cycles # 2.367 GHz - 12,873,018,117 instructions # 1.94 insn per cycle - 2.801456274 seconds time elapsed +TOTAL : 2.844579 sec + 6,638,595,923 cycles # 2.339 GHz + 12,873,058,969 instructions # 1.94 insn per cycle + 2.849721551 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index a12ca3b41d..198199e430 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:34:47 +DATE: 2023-11-09_18:09:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.243102e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.475352e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.475352e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.497702e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.965150e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.965150e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.676327 sec - 5,681,132,328 cycles # 2.981 GHz - 10,328,752,116 instructions # 1.82 insn per cycle - 1.962251346 seconds time elapsed +TOTAL : 1.636588 sec + 5,687,776,927 cycles # 3.043 GHz + 10,344,643,155 instructions # 1.82 insn per cycle + 1.926222709 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -86,14 +86,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.117341e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.320071e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.320071e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.124698e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.329265e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.329265e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.103747 sec - 18,503,457,384 cycles # 3.029 GHz - 43,763,268,873 instructions # 2.37 insn per cycle - 6.109986471 seconds time elapsed +TOTAL : 6.061752 sec + 18,474,797,660 cycles # 3.045 GHz + 43,763,223,756 instructions # 2.37 insn per cycle + 6.067744277 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -114,14 +114,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.169781e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.246790e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.246790e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.280805e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.418662e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.418662e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.406148 sec - 10,026,239,155 cycles # 2.945 GHz - 23,264,915,776 instructions # 2.32 insn per cycle - 3.412744895 seconds time elapsed +TOTAL : 3.239576 sec + 10,001,339,639 cycles # 3.083 GHz + 23,260,791,069 instructions # 2.33 insn per cycle + 3.245668541 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -142,14 +142,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.376931e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.582524e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.582524e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.455472e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.697664e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.697664e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.133404 sec - 9,115,108,969 cycles # 2.904 GHz - 16,712,850,458 instructions # 1.83 insn per cycle - 3.139765331 seconds time elapsed +TOTAL : 3.034907 sec + 9,092,859,245 cycles # 2.991 GHz + 16,710,213,462 instructions # 1.84 insn per cycle + 3.041109346 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -170,14 +170,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.412136e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.649634e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.649634e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.469626e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.746671e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.746671e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.093398 sec - 9,015,171,302 cycles # 2.909 GHz - 16,559,247,945 instructions # 1.84 insn per cycle - 3.099791137 seconds time elapsed +TOTAL : 3.026366 sec + 9,019,828,246 cycles # 2.976 GHz + 16,555,168,621 instructions # 1.84 insn per cycle + 3.032449491 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -198,14 +198,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.406219e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.608241e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.608241e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.460766e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.686068e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.686068e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 3.106272 sec - 7,475,444,541 cycles # 2.404 GHz - 14,076,958,110 instructions # 1.88 insn per cycle - 3.112522018 seconds time elapsed +TOTAL : 3.037604 sec + 7,413,210,247 cycles # 2.436 GHz + 14,077,138,025 instructions # 1.90 insn per cycle + 3.043934055 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index e12a7cff38..38db2540d0 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:47:49 +DATE: 2023-11-09_18:22:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.309547e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.164321e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.211559e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.382431e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.208254e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.230961e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.178788 sec - 4,175,363,575 cycles # 2.986 GHz - 6,687,157,832 instructions # 1.60 insn per cycle - 1.455561692 seconds time elapsed +TOTAL : 1.150438 sec + 4,093,367,606 cycles # 2.986 GHz + 6,655,787,532 instructions # 1.63 insn per cycle + 1.427536965 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -77,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.139229e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.352216e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.352216e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.163257e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.379748e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.379748e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 6.211284 sec - 18,855,190,279 cycles # 3.034 GHz - 43,795,517,542 instructions # 2.32 insn per cycle - 6.216374296 seconds time elapsed +TOTAL : 6.085036 sec + 18,810,997,513 cycles # 3.089 GHz + 43,795,620,513 instructions # 2.33 insn per cycle + 6.090075734 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.318674e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.546898e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.546898e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.379076e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.642823e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.642823e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 3.402195 sec - 10,237,833,782 cycles # 3.006 GHz - 22,007,212,368 instructions # 2.15 insn per cycle - 3.407333694 seconds time elapsed +TOTAL : 3.315467 sec + 10,223,065,521 cycles # 3.080 GHz + 22,006,854,632 instructions # 2.15 insn per cycle + 3.320462987 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.476676e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.816143e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.816143e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.487454e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.825644e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.825644e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.234448 sec - 9,334,268,427 cycles # 2.883 GHz - 15,503,242,414 instructions # 1.66 insn per cycle - 3.239539945 seconds time elapsed +TOTAL : 3.212098 sec + 9,324,905,009 cycles # 2.900 GHz + 15,502,708,810 instructions # 1.66 insn per cycle + 3.217273015 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.532354e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.931778e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.931778e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.573485e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.002018e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.002018e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.179353 sec - 9,298,076,707 cycles # 2.921 GHz - 15,144,691,612 instructions # 1.63 insn per cycle - 3.184641880 seconds time elapsed +TOTAL : 3.120613 sec + 9,288,549,778 cycles # 2.973 GHz + 15,149,849,415 instructions # 1.63 insn per cycle + 3.125542581 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.550309e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.928739e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.928739e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.617810e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.038860e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.038860e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.163394 sec - 7,678,426,346 cycles # 2.424 GHz - 12,579,409,911 instructions # 1.64 insn per cycle - 3.168501704 seconds time elapsed +TOTAL : 3.081671 sec + 7,641,480,002 cycles # 2.476 GHz + 12,579,693,620 instructions # 1.65 insn per cycle + 3.086750346 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt index ed97b2f8ed..6fcc7aa480 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:44:30 +DATE: 2023-11-09_18:19:10 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.311918e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.184761e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.263047e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.390821e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.223370e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.268045e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.849658 sec - 3,163,783,620 cycles # 2.955 GHz - 6,425,624,965 instructions # 2.03 insn per cycle - 1.127772989 seconds time elapsed +TOTAL : 0.831823 sec + 3,198,187,473 cycles # 3.040 GHz + 6,464,633,768 instructions # 2.02 insn per cycle + 1.108743988 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -77,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.132012e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.344208e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.344208e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.166393e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.383502e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.383502e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.993383 sec - 18,094,070,839 cycles # 3.017 GHz - 43,613,404,695 instructions # 2.41 insn per cycle - 5.998406050 seconds time elapsed +TOTAL : 5.750668 sec + 17,811,310,529 cycles # 3.095 GHz + 43,613,299,638 instructions # 2.45 insn per cycle + 5.755604942 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.281067e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.486158e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.486158e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.317079e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.552668e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.552668e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.130477 sec - 9,257,197,715 cycles # 2.953 GHz - 21,925,291,921 instructions # 2.37 insn per cycle - 3.135663717 seconds time elapsed +TOTAL : 3.082399 sec + 9,236,711,908 cycles # 2.992 GHz + 21,926,264,881 instructions # 2.37 insn per cycle + 3.087937460 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.526300e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.881905e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.881905e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.562942e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.932578e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.932578e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.846007 sec - 8,323,404,187 cycles # 2.920 GHz - 15,589,367,643 instructions # 1.87 insn per cycle - 2.851124263 seconds time elapsed +TOTAL : 2.803489 sec + 8,311,895,996 cycles # 2.960 GHz + 15,590,591,103 instructions # 1.88 insn per cycle + 2.808434072 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.559394e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.951403e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.951403e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.582757e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.993146e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.993146e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.815665 sec - 8,248,875,592 cycles # 2.925 GHz - 15,439,478,624 instructions # 1.87 insn per cycle - 2.820889860 seconds time elapsed +TOTAL : 2.784903 sec + 8,236,233,463 cycles # 2.953 GHz + 15,439,539,485 instructions # 1.87 insn per cycle + 2.790025696 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.553964e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.948928e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.948928e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.640868e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.066609e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.066609e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.827281 sec - 6,687,814,053 cycles # 2.363 GHz - 12,869,763,437 instructions # 1.92 insn per cycle - 2.832592565 seconds time elapsed +TOTAL : 2.739279 sec + 6,618,156,482 cycles # 2.412 GHz + 12,869,303,752 instructions # 1.94 insn per cycle + 2.744541017 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index c7d745ef4d..ef7d7310ec 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:41:12 +DATE: 2023-11-09_18:15:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.077097e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.138341e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.120075e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.457534e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.184951e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.150897e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.480161 sec - 5,077,584,264 cycles # 2.967 GHz - 9,258,149,444 instructions # 1.82 insn per cycle - 1.768271684 seconds time elapsed +TOTAL : 1.431692 sec + 5,029,016,765 cycles # 3.047 GHz + 9,191,843,408 instructions # 1.83 insn per cycle + 1.708626202 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -79,14 +79,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.142005e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.354012e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.354012e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.163854e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.380282e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.380282e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.874856 sec - 17,835,700,462 cycles # 3.034 GHz - 43,613,540,806 instructions # 2.45 insn per cycle - 5.879931479 seconds time elapsed +TOTAL : 5.764377 sec + 17,805,909,761 cycles # 3.087 GHz + 43,613,494,568 instructions # 2.45 insn per cycle + 5.769597959 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest.exe @@ -106,14 +106,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.282759e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.491220e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.491220e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.391849e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.652855e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.652855e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.129132 sec - 9,269,728,355 cycles # 2.963 GHz - 21,928,484,188 instructions # 2.37 insn per cycle - 3.134244707 seconds time elapsed +TOTAL : 2.987897 sec + 9,257,292,453 cycles # 3.094 GHz + 21,926,827,781 instructions # 2.37 insn per cycle + 2.993012479 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1936) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest.exe @@ -133,14 +133,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.516560e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.868004e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.868004e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.568515e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.950394e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.950394e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.857533 sec - 8,336,241,805 cycles # 2.913 GHz - 15,589,958,795 instructions # 1.87 insn per cycle - 2.862709487 seconds time elapsed +TOTAL : 2.797984 sec + 8,317,461,722 cycles # 2.968 GHz + 15,591,357,650 instructions # 1.87 insn per cycle + 2.803063629 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.536616e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.924197e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.924197e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.510607e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.874545e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.874545e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.838427 sec - 8,267,692,084 cycles # 2.908 GHz - 15,438,877,256 instructions # 1.87 insn per cycle - 2.843475918 seconds time elapsed +TOTAL : 2.865428 sec + 8,258,982,824 cycles # 2.878 GHz + 15,434,974,292 instructions # 1.87 insn per cycle + 2.870509731 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2496) (512y: 9) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest.exe @@ -187,14 +187,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.539393e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.905150e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.905150e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.534626e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.883996e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.883996e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.843291 sec - 6,667,785,493 cycles # 2.342 GHz - 12,868,798,226 instructions # 1.93 insn per cycle - 2.848396098 seconds time elapsed +TOTAL : 2.847653 sec + 6,630,370,490 cycles # 2.325 GHz + 12,869,864,045 instructions # 1.94 insn per cycle + 2.852728913 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1735) (512y: 17) (512z: 1439) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index 2a5177092e..acb88982d2 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:01:53 +DATE: 2023-11-09_17:38:11 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.628396e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.344836e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.322116e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.537187e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.294303e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.293124e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.558495 sec - 2,344,289,295 cycles # 2.966 GHz - 3,579,154,611 instructions # 1.53 insn per cycle - 0.847997464 seconds time elapsed +TOTAL : 0.560273 sec + 2,360,194,998 cycles # 3.018 GHz + 3,675,767,532 instructions # 1.56 insn per cycle + 0.839402775 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 @@ -77,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.195436e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.435503e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.435503e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.245068e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.494792e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.494792e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.634613 sec - 16,757,667,455 cycles # 2.972 GHz - 41,375,848,460 instructions # 2.47 insn per cycle - 5.639688103 seconds time elapsed +TOTAL : 5.409364 sec + 16,727,058,520 cycles # 3.090 GHz + 41,371,618,921 instructions # 2.47 insn per cycle + 5.414214747 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 375) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.409189e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.740073e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.740073e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.441577e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.817766e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.817766e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.974456 sec - 9,031,167,153 cycles # 3.032 GHz - 21,234,204,961 instructions # 2.35 insn per cycle - 2.979655809 seconds time elapsed +TOTAL : 2.932681 sec + 9,069,604,999 cycles # 3.089 GHz + 21,230,786,011 instructions # 2.34 insn per cycle + 2.937680542 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1841) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.541260e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.926631e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.926631e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.599334e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.008101e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.008101e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.832126 sec - 8,284,857,543 cycles # 2.922 GHz - 15,430,300,133 instructions # 1.86 insn per cycle - 2.837298063 seconds time elapsed +TOTAL : 2.767998 sec + 8,243,229,329 cycles # 2.973 GHz + 15,424,533,858 instructions # 1.87 insn per cycle + 2.772999466 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2536) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.592912e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.031163e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.031163e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.643252e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.114551e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.114551e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.778473 sec - 8,124,076,124 cycles # 2.921 GHz - 15,242,043,085 instructions # 1.88 insn per cycle - 2.783650122 seconds time elapsed +TOTAL : 2.727085 sec + 8,130,917,009 cycles # 2.977 GHz + 15,244,999,510 instructions # 1.87 insn per cycle + 2.732127705 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2423) (512y: 8) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.583024e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.982786e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.982786e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.551006e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.930183e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.930183e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.793855 sec - 6,612,725,918 cycles # 2.363 GHz - 12,851,623,569 instructions # 1.94 insn per cycle - 2.799020549 seconds time elapsed +TOTAL : 2.826162 sec + 6,610,785,893 cycles # 2.336 GHz + 12,848,595,223 instructions # 1.94 insn per cycle + 2.831354272 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1705) (512y: 18) (512z: 1427) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index b5507320b6..1f616951f6 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:24:23 +DATE: 2023-11-09_17:59:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.295762e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.181123e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.251991e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.379623e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.224230e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.277206e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.577748 sec - 2,371,472,909 cycles # 2.938 GHz - 3,662,215,838 instructions # 1.54 insn per cycle - 0.866645313 seconds time elapsed +TOTAL : 0.567867 sec + 2,380,227,304 cycles # 3.011 GHz + 3,716,615,660 instructions # 1.56 insn per cycle + 0.847985852 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 117 @@ -77,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.709669e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.230063e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.230063e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.702473e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.225484e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.225484e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 4.043238 sec - 12,201,253,013 cycles # 3.016 GHz - 32,520,928,331 instructions # 2.67 insn per cycle - 4.048480591 seconds time elapsed +TOTAL : 4.053519 sec + 12,216,293,497 cycles # 3.011 GHz + 32,522,254,109 instructions # 2.66 insn per cycle + 4.058663851 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 312) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.776736e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.688717e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.688717e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.830691e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.806288e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.806288e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.629894 sec - 8,006,523,859 cycles # 3.039 GHz - 18,689,561,969 instructions # 2.33 insn per cycle - 2.635155805 seconds time elapsed +TOTAL : 2.580563 sec + 7,975,462,428 cycles # 3.085 GHz + 18,690,132,924 instructions # 2.34 insn per cycle + 2.585721810 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1554) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.876319e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.776118e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.776118e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.931453e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.867355e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.867355e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.544972 sec - 7,483,863,921 cycles # 2.935 GHz - 14,252,784,118 instructions # 1.90 insn per cycle - 2.550249205 seconds time elapsed +TOTAL : 2.497040 sec + 7,461,995,802 cycles # 2.983 GHz + 14,254,175,720 instructions # 1.91 insn per cycle + 2.502220546 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2237) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.940665e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.960644e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.960644e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.990445e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.025789e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.025789e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.495422 sec - 7,326,781,172 cycles # 2.931 GHz - 13,945,833,508 instructions # 1.90 insn per cycle - 2.500698244 seconds time elapsed +TOTAL : 2.453022 sec + 7,312,763,088 cycles # 2.976 GHz + 13,952,233,674 instructions # 1.91 insn per cycle + 2.458314250 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2096) (512y: 3) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.636740e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.108198e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.108198e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.649642e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.141006e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.141006e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.746264 sec - 6,527,138,912 cycles # 2.373 GHz - 13,421,028,013 instructions # 2.06 insn per cycle - 2.751679406 seconds time elapsed +TOTAL : 2.733236 sec + 6,541,090,853 cycles # 2.390 GHz + 13,422,969,862 instructions # 2.05 insn per cycle + 2.738380923 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2071) (512y: 1) (512z: 1198) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index b6c42e0895..374f2a331e 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:24:50 +DATE: 2023-11-09_17:59:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.300995e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.194789e+09 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.295764e+09 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.383788e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.237025e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.315197e+09 ) sec^-1 MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.573687 sec - 2,396,122,888 cycles # 2.957 GHz - 3,709,386,643 instructions # 1.55 insn per cycle - 0.867525381 seconds time elapsed +TOTAL : 0.566447 sec + 2,356,919,781 cycles # 2.991 GHz + 3,683,739,571 instructions # 1.56 insn per cycle + 0.846741071 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 95 @@ -77,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.274435e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.306451e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.306451e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.320968e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.384461e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.384461e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.128769 sec - 9,423,056,878 cycles # 3.008 GHz - 25,306,341,141 instructions # 2.69 insn per cycle - 3.134038482 seconds time elapsed +TOTAL : 3.069124 sec + 9,404,467,335 cycles # 3.060 GHz + 25,307,412,416 instructions # 2.69 insn per cycle + 3.074433972 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.099658e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.759584e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.759584e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.164094e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.875777e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.875777e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.397339 sec - 7,201,211,606 cycles # 2.998 GHz - 16,901,413,977 instructions # 2.35 insn per cycle - 2.402789017 seconds time elapsed +TOTAL : 2.347070 sec + 7,183,873,212 cycles # 3.055 GHz + 16,901,716,244 instructions # 2.35 insn per cycle + 2.352401841 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1359) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.019910e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.199492e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.199492e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.103853e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.343298e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.343298e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.443323 sec - 7,147,435,963 cycles # 2.920 GHz - 13,619,110,670 instructions # 1.91 insn per cycle - 2.448969091 seconds time elapsed +TOTAL : 2.377491 sec + 7,114,519,285 cycles # 2.987 GHz + 13,619,081,744 instructions # 1.91 insn per cycle + 2.382536600 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2060) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.050148e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.307582e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.307582e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.131276e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.434861e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.434861e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.423418 sec - 7,082,396,314 cycles # 2.918 GHz - 13,431,226,521 instructions # 1.90 insn per cycle - 2.429141482 seconds time elapsed +TOTAL : 2.360462 sec + 7,057,553,337 cycles # 2.985 GHz + 13,435,682,624 instructions # 1.90 insn per cycle + 2.365710938 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1945) (512y: 4) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.725279e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.338904e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.338904e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.814153e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.521058e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.521058e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.669392 sec - 6,366,623,257 cycles # 2.381 GHz - 13,153,230,984 instructions # 2.07 insn per cycle - 2.674848562 seconds time elapsed +TOTAL : 2.589000 sec + 6,345,330,255 cycles # 2.447 GHz + 13,153,121,215 instructions # 2.07 insn per cycle + 2.594408710 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2029) (512y: 1) (512z: 1083) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 40be1e0fe4..8dc3126453 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:02:23 +DATE: 2023-11-09_17:38:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.986561e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.920506e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.026737e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.618205e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.831793e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.977288e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.651585 sec - 2,613,210,290 cycles # 2.977 GHz - 4,026,633,947 instructions # 1.54 insn per cycle - 0.940304085 seconds time elapsed +TOTAL : 0.652821 sec + 2,648,283,165 cycles # 3.003 GHz + 4,101,874,172 instructions # 1.55 insn per cycle + 0.942277172 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 @@ -77,14 +77,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.098312e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.283308e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.283308e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.110302e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.297006e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.297006e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.126587 sec - 18,732,621,094 cycles # 3.056 GHz - 44,288,636,649 instructions # 2.36 insn per cycle - 6.131702524 seconds time elapsed +TOTAL : 6.061696 sec + 18,702,058,697 cycles # 3.083 GHz + 44,286,744,373 instructions # 2.37 insn per cycle + 6.066885580 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.724748e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.279623e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.279623e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.748205e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.315149e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.315149e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.052368 sec - 12,345,078,225 cycles # 3.044 GHz - 30,962,385,061 instructions # 2.51 insn per cycle - 4.057665704 seconds time elapsed +TOTAL : 3.994898 sec + 12,345,141,895 cycles # 3.087 GHz + 30,960,600,041 instructions # 2.51 insn per cycle + 4.000031168 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1685) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.012805e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.801799e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.801799e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.024705e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.805066e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.805066e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.527503 sec - 10,105,777,222 cycles # 2.861 GHz - 19,402,091,411 instructions # 1.92 insn per cycle - 3.532885933 seconds time elapsed +TOTAL : 3.505414 sec + 10,100,327,501 cycles # 2.878 GHz + 19,399,870,617 instructions # 1.92 insn per cycle + 3.510718654 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2146) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.136223e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.011490e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.011490e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.175175e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.066367e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.066367e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.337554 sec - 9,780,270,182 cycles # 2.927 GHz - 18,984,447,401 instructions # 1.94 insn per cycle - 3.342834380 seconds time elapsed +TOTAL : 3.279954 sec + 9,681,673,426 cycles # 2.948 GHz + 18,969,865,921 instructions # 1.96 insn per cycle + 3.285422855 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1859) (512y: 188) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.916274e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.582982e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.582982e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.948024e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.629123e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.629123e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.678279 sec - 8,374,553,290 cycles # 2.274 GHz - 15,066,979,076 instructions # 1.80 insn per cycle - 3.683518796 seconds time elapsed +TOTAL : 3.620894 sec + 8,364,739,572 cycles # 2.308 GHz + 15,064,814,645 instructions # 1.80 insn per cycle + 3.626218437 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1023) (512y: 155) (512z: 1316) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index d0448f95d2..a2d87f5da8 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2023-11-03_19:02:57 +DATE: 2023-11-09_17:39:14 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.995389e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.942657e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.069355e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.632265e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.861047e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.036900e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.648218 sec - 2,577,449,374 cycles # 2.937 GHz - 3,930,119,139 instructions # 1.52 insn per cycle - 0.934838617 seconds time elapsed +TOTAL : 0.649714 sec + 2,641,937,888 cycles # 3.008 GHz + 4,107,555,428 instructions # 1.55 insn per cycle + 0.938941535 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 @@ -77,14 +77,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.138539e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.340756e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.340756e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.158637e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.370951e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.370951e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 5.930169 sec - 17,940,598,550 cycles # 3.023 GHz - 42,539,439,563 instructions # 2.37 insn per cycle - 5.935391018 seconds time elapsed +TOTAL : 5.823305 sec + 18,013,373,486 cycles # 3.091 GHz + 42,535,982,962 instructions # 2.36 insn per cycle + 5.828417378 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 421) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.737380e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.320541e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.320541e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.770599e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.353490e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.353490e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.022351 sec - 12,179,829,023 cycles # 3.025 GHz - 30,269,422,152 instructions # 2.49 insn per cycle - 4.027705928 seconds time elapsed +TOTAL : 3.950045 sec + 12,171,205,402 cycles # 3.078 GHz + 30,268,628,414 instructions # 2.49 insn per cycle + 3.955313835 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1692) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.003006e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.791277e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.791277e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.099406e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.925166e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.925166e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.544763 sec - 10,086,483,930 cycles # 2.843 GHz - 19,285,075,836 instructions # 1.91 insn per cycle - 3.550049339 seconds time elapsed +TOTAL : 3.385614 sec + 10,033,748,773 cycles # 2.960 GHz + 19,281,534,051 instructions # 1.92 insn per cycle + 3.390768328 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2162) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.153713e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.048947e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.048947e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.135260e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.020042e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.020042e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.313722 sec - 9,652,564,948 cycles # 2.909 GHz - 18,773,850,855 instructions # 1.94 insn per cycle - 3.319022077 seconds time elapsed +TOTAL : 3.343797 sec + 9,615,342,352 cycles # 2.872 GHz + 18,771,093,665 instructions # 1.95 insn per cycle + 3.349067283 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1833) (512y: 191) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.911178e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.576380e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.576380e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.965653e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.666391e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.666391e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.691490 sec - 8,274,258,282 cycles # 2.239 GHz - 14,991,882,108 instructions # 1.81 insn per cycle - 3.696773496 seconds time elapsed +TOTAL : 3.592114 sec + 8,278,170,966 cycles # 2.302 GHz + 14,988,534,751 instructions # 1.81 insn per cycle + 3.597402233 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1020) (512y: 156) (512z: 1305) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index ecfe1f9032..dad81481e1 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:03:30 +DATE: 2023-11-09_17:39:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.269149e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.178306e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270483e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.113101e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.178068e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.274620e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.515028 sec - 2,190,362,135 cycles # 2.945 GHz - 3,134,430,746 instructions # 1.43 insn per cycle - 0.801320986 seconds time elapsed +TOTAL : 0.513513 sec + 2,238,779,994 cycles # 3.016 GHz + 3,236,054,047 instructions # 1.45 insn per cycle + 0.800586540 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -77,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.141790e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.204663e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.204663e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.199296e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.263095e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.263095e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.001947 sec - 15,160,921,453 cycles # 3.029 GHz - 38,440,320,018 instructions # 2.54 insn per cycle - 5.007262329 seconds time elapsed +TOTAL : 4.870986 sec + 15,138,095,755 cycles # 3.105 GHz + 38,436,824,615 instructions # 2.54 insn per cycle + 4.876178872 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.537912e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.729582e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.729582e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.669942e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.869262e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.869262e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.070180 sec - 9,135,564,109 cycles # 2.971 GHz - 24,595,068,911 instructions # 2.69 insn per cycle - 3.075510770 seconds time elapsed +TOTAL : 2.960626 sec + 9,095,550,717 cycles # 3.068 GHz + 24,591,504,229 instructions # 2.70 insn per cycle + 2.966139239 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.794659e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.298456e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.298456e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.803896e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.327557e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.327557e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.915155 sec - 5,488,800,341 cycles # 2.860 GHz - 11,269,289,809 instructions # 2.05 insn per cycle - 1.920562747 seconds time elapsed +TOTAL : 1.909794 sec + 5,486,817,505 cycles # 2.866 GHz + 11,265,648,347 instructions # 2.05 insn per cycle + 1.915029323 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.465243e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.099655e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.099655e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.555272e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.195980e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.195980e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.726047 sec - 4,948,464,581 cycles # 2.859 GHz - 10,575,268,094 instructions # 2.14 insn per cycle - 1.731560491 seconds time elapsed +TOTAL : 1.704245 sec + 4,927,847,485 cycles # 2.884 GHz + 10,572,013,859 instructions # 2.15 insn per cycle + 1.709455619 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.977744e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.204839e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.204839e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.103362e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.341522e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.341522e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.740172 sec - 5,379,659,738 cycles # 1.960 GHz - 7,808,789,832 instructions # 1.45 insn per cycle - 2.745493260 seconds time elapsed +TOTAL : 2.658432 sec + 5,379,828,238 cycles # 2.021 GHz + 7,805,118,346 instructions # 1.45 insn per cycle + 2.663615123 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index dd2f256477..d089f3ea80 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:35:20 +DATE: 2023-11-09_18:10:07 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.496633e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.880527e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.880527e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.436618e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.989585e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.989585e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.808083 sec - 3,120,895,454 cycles # 2.971 GHz - 4,726,889,577 instructions # 1.51 insn per cycle - 1.107972527 seconds time elapsed +TOTAL : 0.820320 sec + 3,087,525,024 cycles # 2.881 GHz + 4,797,416,225 instructions # 1.55 insn per cycle + 1.129126082 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -86,14 +86,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.117962e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.179706e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.179706e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.137936e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.202451e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.202451e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.135527 sec - 15,504,544,823 cycles # 3.016 GHz - 38,497,224,440 instructions # 2.48 insn per cycle - 5.142229259 seconds time elapsed +TOTAL : 5.088780 sec + 15,506,176,025 cycles # 3.045 GHz + 38,500,320,484 instructions # 2.48 insn per cycle + 5.095207532 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -114,14 +114,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.595756e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.790745e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.790745e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.664205e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.863051e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.863051e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.098715 sec - 9,432,801,004 cycles # 3.038 GHz - 24,773,895,780 instructions # 2.63 insn per cycle - 3.105439323 seconds time elapsed +TOTAL : 3.042056 sec + 9,436,538,509 cycles # 3.096 GHz + 24,774,730,249 instructions # 2.63 insn per cycle + 3.048601444 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -142,14 +142,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.527781e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.981315e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.981315e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.821161e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.311886e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.311886e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.098555 sec - 5,826,323,105 cycles # 2.789 GHz - 11,554,423,664 instructions # 1.98 insn per cycle - 2.105206679 seconds time elapsed +TOTAL : 1.984151 sec + 5,841,767,961 cycles # 2.936 GHz + 11,552,228,699 instructions # 1.98 insn per cycle + 1.990639911 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -170,14 +170,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.300396e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.893264e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.893264e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.505257e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.122209e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.122209e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.849117 sec - 5,294,307,248 cycles # 2.854 GHz - 10,856,382,305 instructions # 2.05 insn per cycle - 1.855861110 seconds time elapsed +TOTAL : 1.793114 sec + 5,293,839,115 cycles # 2.943 GHz + 10,856,913,242 instructions # 2.05 insn per cycle + 1.799607546 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -198,14 +198,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.891057e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.111611e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.111611e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.021313e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.250852e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.250852e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.882235 sec - 5,742,873,090 cycles # 1.988 GHz - 8,048,787,968 instructions # 1.40 insn per cycle - 2.889049440 seconds time elapsed +TOTAL : 2.791071 sec + 5,762,529,693 cycles # 2.060 GHz + 8,048,857,986 instructions # 1.40 insn per cycle + 2.797719094 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 70c42f96ca..d4092f872a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:48:21 +DATE: 2023-11-09_18:22:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.579966e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.154296e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270387e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.736311e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.160845e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.271332e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.619804 sec - 2,500,171,473 cycles # 2.947 GHz - 3,610,462,854 instructions # 1.44 insn per cycle - 0.906022247 seconds time elapsed +TOTAL : 0.616077 sec + 2,487,675,163 cycles # 2.949 GHz + 3,609,155,412 instructions # 1.45 insn per cycle + 0.900867999 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -77,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.141469e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.204103e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.204103e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.176864e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.240941e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.240941e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 5.061672 sec - 15,345,417,554 cycles # 3.029 GHz - 38,452,483,858 instructions # 2.51 insn per cycle - 5.067127392 seconds time elapsed +TOTAL : 4.979715 sec + 15,323,819,271 cycles # 3.075 GHz + 38,452,992,607 instructions # 2.51 insn per cycle + 4.984901972 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.594441e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.787517e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.787517e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.677729e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.878488e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.878488e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.081938 sec - 9,306,122,505 cycles # 3.015 GHz - 24,590,602,612 instructions # 2.64 insn per cycle - 3.087467598 seconds time elapsed +TOTAL : 3.013873 sec + 9,290,869,776 cycles # 3.079 GHz + 24,592,367,735 instructions # 2.65 insn per cycle + 3.019043179 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.780444e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.284766e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.284766e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.850559e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.370312e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.370312e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.978919 sec - 5,659,108,727 cycles # 2.853 GHz - 11,248,307,846 instructions # 1.99 insn per cycle - 1.984493875 seconds time elapsed +TOTAL : 1.954495 sec + 5,685,208,050 cycles # 2.902 GHz + 11,247,975,749 instructions # 1.98 insn per cycle + 1.959795584 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.409554e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.043503e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.043503e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.607127e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.248201e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.248201e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.801971 sec - 5,131,678,035 cycles # 2.841 GHz - 10,518,217,961 instructions # 2.05 insn per cycle - 1.807387516 seconds time elapsed +TOTAL : 1.748348 sec + 5,124,696,849 cycles # 2.923 GHz + 10,520,869,381 instructions # 2.05 insn per cycle + 1.753705732 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.952294e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.178919e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.178919e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.874391e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.086224e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.086224e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.820832 sec - 5,565,619,645 cycles # 1.970 GHz - 7,754,617,723 instructions # 1.39 insn per cycle - 2.826352548 seconds time elapsed +TOTAL : 2.872375 sec + 5,588,777,867 cycles # 1.950 GHz + 7,758,258,898 instructions # 1.39 insn per cycle + 2.877703247 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt index 4837b41444..b9b046957a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:45:01 +DATE: 2023-11-09_18:19:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.583777e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.154968e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.271096e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.737213e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.157401e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270983e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.557101 sec - 2,322,977,037 cycles # 2.953 GHz - 3,599,423,025 instructions # 1.55 insn per cycle - 0.843882316 seconds time elapsed +TOTAL : 0.551870 sec + 2,343,082,954 cycles # 3.005 GHz + 3,662,705,915 instructions # 1.56 insn per cycle + 0.837059271 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -77,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.134010e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.196717e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.196717e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.189334e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.253549e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.253549e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.022340 sec - 15,161,844,495 cycles # 3.017 GHz - 38,436,020,868 instructions # 2.54 insn per cycle - 5.028057319 seconds time elapsed +TOTAL : 4.893645 sec + 15,145,823,463 cycles # 3.092 GHz + 38,436,891,323 instructions # 2.54 insn per cycle + 4.899128465 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.611425e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.807723e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.807723e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.701689e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.903671e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.903671e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.009043 sec - 9,092,248,013 cycles # 3.018 GHz - 24,590,993,356 instructions # 2.70 insn per cycle - 3.014816078 seconds time elapsed +TOTAL : 2.937292 sec + 9,090,406,845 cycles # 3.091 GHz + 24,590,949,325 instructions # 2.71 insn per cycle + 2.942627315 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.765157e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.263695e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.263695e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.932093e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.448459e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.448459e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.924911 sec - 5,492,799,049 cycles # 2.847 GHz - 11,264,994,094 instructions # 2.05 insn per cycle - 1.930399853 seconds time elapsed +TOTAL : 1.870782 sec + 5,477,596,736 cycles # 2.921 GHz + 11,265,174,730 instructions # 2.06 insn per cycle + 1.876089705 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.461458e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.086226e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.086226e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.470328e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.111006e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.111006e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.728063 sec - 4,951,669,022 cycles # 2.858 GHz - 10,569,075,843 instructions # 2.13 insn per cycle - 1.733593807 seconds time elapsed +TOTAL : 1.725707 sec + 4,951,306,612 cycles # 2.866 GHz + 10,571,555,034 instructions # 2.14 insn per cycle + 1.731137280 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.938989e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.163796e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.163796e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.944088e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.162238e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.162238e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.768049 sec - 5,404,539,268 cycles # 1.950 GHz - 7,804,733,779 instructions # 1.44 insn per cycle - 2.773480694 seconds time elapsed +TOTAL : 2.762921 sec + 5,392,276,499 cycles # 1.949 GHz + 7,806,030,768 instructions # 1.45 insn per cycle + 2.768347372 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index 04f32ac3bc..655f8b81f2 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:41:43 +DATE: 2023-11-09_18:16:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.845624e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.154000e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.267501e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.038584e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.158740e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.268341e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.705622 sec - 2,764,377,825 cycles # 2.955 GHz - 4,322,445,800 instructions # 1.56 insn per cycle - 0.992638570 seconds time elapsed +TOTAL : 0.696596 sec + 2,778,143,738 cycles # 3.016 GHz + 4,350,451,856 instructions # 1.57 insn per cycle + 0.980250782 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -79,14 +79,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.118266e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.179189e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.179189e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.182446e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.245839e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.245839e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.055003 sec - 15,355,352,228 cycles # 3.035 GHz - 38,436,037,499 instructions # 2.50 insn per cycle - 5.060369145 seconds time elapsed +TOTAL : 4.909547 sec + 15,150,996,904 cycles # 3.083 GHz + 38,436,637,567 instructions # 2.54 insn per cycle + 4.914838193 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest.exe @@ -106,14 +106,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.619308e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.814626e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.814626e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.688279e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.888858e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.888858e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.002993 sec - 9,098,824,080 cycles # 3.025 GHz - 24,590,228,698 instructions # 2.70 insn per cycle - 3.008485414 seconds time elapsed +TOTAL : 2.947447 sec + 9,111,190,675 cycles # 3.087 GHz + 24,590,939,294 instructions # 2.70 insn per cycle + 2.952793630 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2156) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest.exe @@ -133,14 +133,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.738465e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.252767e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.252767e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.931624e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.461725e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.461725e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.934521 sec - 5,491,674,204 cycles # 2.833 GHz - 11,265,170,941 instructions # 2.05 insn per cycle - 1.939950087 seconds time elapsed +TOTAL : 1.871366 sec + 5,440,450,573 cycles # 2.900 GHz + 11,265,206,629 instructions # 2.07 insn per cycle + 1.876659163 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2376) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.341479e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.957193e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.957193e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.623582e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.268733e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.268733e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.759206 sec - 4,958,873,003 cycles # 2.811 GHz - 10,570,272,367 instructions # 2.13 insn per cycle - 1.764825335 seconds time elapsed +TOTAL : 1.687174 sec + 4,939,929,910 cycles # 2.920 GHz + 10,570,291,125 instructions # 2.14 insn per cycle + 1.692619999 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2077) (512y: 144) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest.exe @@ -187,14 +187,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.934828e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.158501e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.158501e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.058474e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.295667e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.295667e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.772888 sec - 5,409,288,056 cycles # 1.948 GHz - 7,806,084,388 instructions # 1.44 insn per cycle - 2.778257755 seconds time elapsed +TOTAL : 2.687752 sec + 5,409,737,421 cycles # 2.010 GHz + 7,805,529,138 instructions # 1.44 insn per cycle + 2.693129228 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1446) (512y: 122) (512z: 1542) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 4e3b221e19..e703e9e5d5 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:03:57 +DATE: 2023-11-09_17:40:13 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.258167e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.174363e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.266024e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.110180e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.174406e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270579e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.515882 sec - 2,147,525,845 cycles # 2.877 GHz - 3,086,933,024 instructions # 1.44 insn per cycle - 0.803849250 seconds time elapsed +TOTAL : 0.513301 sec + 2,237,705,656 cycles # 3.016 GHz + 3,206,861,926 instructions # 1.43 insn per cycle + 0.799816578 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 @@ -77,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.170531e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.234097e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.234097e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.213184e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.278334e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.278334e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.935479 sec - 15,016,135,362 cycles # 3.040 GHz - 40,166,123,209 instructions # 2.67 insn per cycle - 4.940913654 seconds time elapsed +TOTAL : 4.840849 sec + 15,026,294,462 cycles # 3.101 GHz + 40,163,846,165 instructions # 2.67 insn per cycle + 4.846092672 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.815308e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.035943e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.035943e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.848578e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.068499e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.068499e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.853658 sec - 8,679,305,567 cycles # 3.037 GHz - 23,688,803,932 instructions # 2.73 insn per cycle - 2.859362026 seconds time elapsed +TOTAL : 2.827567 sec + 8,771,607,406 cycles # 3.097 GHz + 23,683,918,687 instructions # 2.70 insn per cycle + 2.832818835 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2069) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.201194e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.599502e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.599502e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.290749e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.696907e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.696907e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.119971 sec - 6,076,924,812 cycles # 2.860 GHz - 13,078,281,182 instructions # 2.15 insn per cycle - 2.125352086 seconds time elapsed +TOTAL : 2.084491 sec + 6,075,216,707 cycles # 2.908 GHz + 13,074,699,153 instructions # 2.15 insn per cycle + 2.089762357 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2546) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.478450e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.920522e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.920522e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.571274e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.025491e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.025491e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.017570 sec - 5,787,274,892 cycles # 2.862 GHz - 12,336,105,279 instructions # 2.13 insn per cycle - 2.023012261 seconds time elapsed +TOTAL : 1.983621 sec + 5,795,280,725 cycles # 2.915 GHz + 12,334,890,295 instructions # 2.13 insn per cycle + 1.988789955 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2096) (512y: 294) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.519779e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.701184e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.701184e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.706784e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.899846e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.899846e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.086221 sec - 5,817,765,621 cycles # 1.888 GHz - 9,621,068,231 instructions # 1.65 insn per cycle - 3.091564620 seconds time elapsed +TOTAL : 2.932528 sec + 5,816,798,800 cycles # 1.981 GHz + 9,613,398,484 instructions # 1.65 insn per cycle + 2.938057800 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1510) (512y: 209) (512z: 1971) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 3337c01ad4..a5c5a0c704 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:25:16 +DATE: 2023-11-09_17:59:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.554755e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.155174e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.268743e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.735374e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.165776e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.275136e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.526687 sec - 2,250,994,801 cycles # 2.926 GHz - 3,097,737,524 instructions # 1.38 insn per cycle - 0.826717654 seconds time elapsed +TOTAL : 0.522181 sec + 2,183,845,501 cycles # 2.897 GHz + 3,063,497,760 instructions # 1.40 insn per cycle + 0.813008083 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -77,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.473532e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.556761e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.556761e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.487456e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.573222e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.573222e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.344783 sec - 13,019,193,404 cycles # 2.993 GHz - 34,405,663,599 instructions # 2.64 insn per cycle - 4.350365607 seconds time elapsed +TOTAL : 4.321242 sec + 13,015,032,492 cycles # 3.009 GHz + 34,406,787,342 instructions # 2.64 insn per cycle + 4.326519493 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 686) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.104680e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.249620e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.249620e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.121956e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.266333e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.266333e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.483866 sec - 10,607,531,951 cycles # 3.041 GHz - 24,022,392,993 instructions # 2.26 insn per cycle - 3.489298956 seconds time elapsed +TOTAL : 3.465165 sec + 10,606,115,107 cycles # 3.057 GHz + 24,023,886,202 instructions # 2.27 insn per cycle + 3.470527002 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2582) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.787875e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.125865e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.125865e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.813993e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.151107e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.151107e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.295291 sec - 6,588,895,934 cycles # 2.865 GHz - 12,413,954,044 instructions # 1.88 insn per cycle - 2.300926049 seconds time elapsed +TOTAL : 2.282824 sec + 6,624,207,523 cycles # 2.896 GHz + 12,414,593,585 instructions # 1.87 insn per cycle + 2.288220203 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3156) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.072251e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.445053e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.445053e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.113256e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.489865e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.489865e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.171777 sec - 6,238,931,665 cycles # 2.866 GHz - 11,585,660,605 instructions # 1.86 insn per cycle - 2.177410338 seconds time elapsed +TOTAL : 2.154567 sec + 6,244,302,737 cycles # 2.892 GHz + 11,586,784,905 instructions # 1.86 insn per cycle + 2.160119888 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2692) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.998110e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.229600e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.229600e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.080168e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.315597e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.315597e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.727363 sec - 5,337,713,756 cycles # 1.954 GHz - 9,308,309,205 instructions # 1.74 insn per cycle - 2.732896997 seconds time elapsed +TOTAL : 2.674256 sec + 5,337,021,373 cycles # 1.992 GHz + 9,309,292,596 instructions # 1.74 insn per cycle + 2.679621915 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2116) (512y: 282) (512z: 1958) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index 64e33308d5..04c22c3970 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:25:43 +DATE: 2023-11-09_18:00:26 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.571117e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.157677e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270835e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.730812e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.162658e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.271522e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.523342 sec - 2,241,527,426 cycles # 2.944 GHz - 3,209,964,665 instructions # 1.43 insn per cycle - 0.819917937 seconds time elapsed +TOTAL : 0.517390 sec + 2,237,231,843 cycles # 2.985 GHz + 3,219,482,821 instructions # 1.44 insn per cycle + 0.806478536 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 @@ -77,14 +77,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.658099e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.754988e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.754988e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.686328e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.783817e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.783817e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.049112 sec - 12,374,606,485 cycles # 3.053 GHz - 35,058,016,337 instructions # 2.83 insn per cycle - 4.054549094 seconds time elapsed +TOTAL : 4.006446 sec + 12,372,456,833 cycles # 3.085 GHz + 35,059,205,099 instructions # 2.83 insn per cycle + 4.011874603 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 457) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.088523e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.231607e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.231607e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.113185e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.255090e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.255090e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.500477 sec - 10,694,410,777 cycles # 3.051 GHz - 23,099,336,289 instructions # 2.16 insn per cycle - 3.506159729 seconds time elapsed +TOTAL : 3.471336 sec + 10,684,507,667 cycles # 3.074 GHz + 23,099,965,959 instructions # 2.16 insn per cycle + 3.476724591 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2363) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.105721e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.492220e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.492220e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.172732e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.564192e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.564192e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.158641 sec - 6,163,495,994 cycles # 2.849 GHz - 11,969,488,967 instructions # 1.94 insn per cycle - 2.164367762 seconds time elapsed +TOTAL : 2.130496 sec + 6,169,121,187 cycles # 2.891 GHz + 11,970,628,399 instructions # 1.94 insn per cycle + 2.136000238 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2511) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.169198e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.571659e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.571659e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.314737e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.728928e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.728928e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.133549 sec - 6,039,094,179 cycles # 2.824 GHz - 11,144,077,781 instructions # 1.85 insn per cycle - 2.139096234 seconds time elapsed +TOTAL : 2.076859 sec + 6,006,071,025 cycles # 2.885 GHz + 11,143,550,799 instructions # 1.86 insn per cycle + 2.082481137 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2128) (512y: 174) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.003701e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.233597e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.233597e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.186490e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.434908e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.434908e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.726476 sec - 5,224,063,612 cycles # 1.913 GHz - 9,034,702,359 instructions # 1.73 insn per cycle - 2.732050023 seconds time elapsed +TOTAL : 2.608202 sec + 5,201,388,823 cycles # 1.991 GHz + 9,034,449,537 instructions # 1.74 insn per cycle + 2.613510222 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1651) (512y: 208) (512z: 1567) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 8d92c550fe..b055a915bb 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:04:25 +DATE: 2023-11-09_17:40:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.099342e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.699387e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.953526e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.058988e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.701786e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.976764e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.471293 sec - 2,042,101,644 cycles # 2.948 GHz - 2,946,816,826 instructions # 1.44 insn per cycle - 0.749881107 seconds time elapsed +TOTAL : 0.470897 sec + 2,078,401,117 cycles # 3.001 GHz + 2,953,650,991 instructions # 1.42 insn per cycle + 0.749721776 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -77,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.296642e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.371475e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.371475e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.334914e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.410542e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.410542e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.650028 sec - 14,160,157,406 cycles # 3.043 GHz - 38,398,040,352 instructions # 2.71 insn per cycle - 4.655270250 seconds time elapsed +TOTAL : 4.574332 sec + 14,151,959,917 cycles # 3.091 GHz + 38,392,913,322 instructions # 2.71 insn per cycle + 4.579307325 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.139917e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.562152e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.562152e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.213719e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.641599e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.641599e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.124632 sec - 6,476,959,128 cycles # 3.042 GHz - 15,834,256,517 instructions # 2.44 insn per cycle - 2.129768462 seconds time elapsed +TOTAL : 2.094684 sec + 6,471,158,629 cycles # 3.083 GHz + 15,829,971,957 instructions # 2.45 insn per cycle + 2.099849038 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.088663e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.043198e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.043198e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.559598e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.101002e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.101002e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.237397 sec - 3,465,504,689 cycles # 2.794 GHz - 7,611,207,779 instructions # 2.20 insn per cycle - 1.242588855 seconds time elapsed +TOTAL : 1.179944 sec + 3,466,899,201 cycles # 2.927 GHz + 7,607,183,710 instructions # 2.19 insn per cycle + 1.185084453 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.457008e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.096549e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.096549e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.023293e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.190211e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.190211e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.196326 sec - 3,247,822,045 cycles # 2.704 GHz - 7,220,309,293 instructions # 2.22 insn per cycle - 1.201704693 seconds time elapsed +TOTAL : 1.106259 sec + 3,248,324,558 cycles # 2.924 GHz + 7,215,751,749 instructions # 2.22 insn per cycle + 1.111467205 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.679715e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.389169e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.389169e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.338108e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.142577e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.142577e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.658315 sec - 3,062,288,257 cycles # 1.842 GHz - 5,850,668,317 instructions # 1.91 insn per cycle - 1.663822965 seconds time elapsed +TOTAL : 1.512902 sec + 3,068,145,100 cycles # 2.024 GHz + 5,846,808,445 instructions # 1.91 insn per cycle + 1.518114660 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index a1ebef89d2..b4b4f0117a 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:35:48 +DATE: 2023-11-09_18:10:35 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.064201e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.498245e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.498245e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.332495e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.768677e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.768677e+07 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.670260 sec - 2,637,877,021 cycles # 2.942 GHz - 4,088,256,570 instructions # 1.55 insn per cycle - 0.955124097 seconds time elapsed +TOTAL : 0.657541 sec + 2,664,976,053 cycles # 3.017 GHz + 4,137,029,639 instructions # 1.55 insn per cycle + 0.940709573 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -86,14 +86,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.270912e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.344925e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.344925e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.284632e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.359311e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.359311e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.744982 sec - 14,378,860,027 cycles # 3.027 GHz - 38,435,472,086 instructions # 2.67 insn per cycle - 4.751370421 seconds time elapsed +TOTAL : 4.717410 sec + 14,339,509,352 cycles # 3.036 GHz + 38,436,261,270 instructions # 2.68 insn per cycle + 4.723588153 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -114,14 +114,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.017460e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.422989e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.422989e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.161401e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.579571e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.579571e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.222354 sec - 6,685,137,863 cycles # 3.001 GHz - 16,109,819,565 instructions # 2.41 insn per cycle - 2.228696460 seconds time elapsed +TOTAL : 2.160392 sec + 6,674,034,151 cycles # 3.082 GHz + 16,110,239,223 instructions # 2.41 insn per cycle + 2.166483007 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -142,14 +142,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.204872e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.057185e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.057185e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.368587e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.075649e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.075649e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.267912 sec - 3,665,496,802 cycles # 2.878 GHz - 7,843,464,752 instructions # 2.14 insn per cycle - 1.274414413 seconds time elapsed +TOTAL : 1.245937 sec + 3,665,898,836 cycles # 2.929 GHz + 7,844,268,726 instructions # 2.14 insn per cycle + 1.252070096 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -170,14 +170,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.639653e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.116975e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.116975e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.007320e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.169448e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.169448e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.220373 sec - 3,444,640,052 cycles # 2.810 GHz - 7,451,522,975 instructions # 2.16 insn per cycle - 1.226715796 seconds time elapsed +TOTAL : 1.168797 sec + 3,453,510,139 cycles # 2.941 GHz + 7,453,168,499 instructions # 2.16 insn per cycle + 1.174935345 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -198,14 +198,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.178040e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.972638e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.972638e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.465484e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.304262e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.304262e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.593853 sec - 3,283,201,976 cycles # 2.053 GHz - 6,099,788,393 instructions # 1.86 insn per cycle - 1.600161746 seconds time elapsed +TOTAL : 1.534111 sec + 3,274,248,388 cycles # 2.127 GHz + 6,100,577,921 instructions # 1.86 insn per cycle + 1.540213764 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index b7fb0d6959..375a817a79 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:48:49 +DATE: 2023-11-09_18:23:25 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.431152e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.624289e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.946132e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.824516e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.637814e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.946525e+08 ) sec^-1 MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.564134 sec - 2,302,613,621 cycles # 2.942 GHz - 3,377,451,746 instructions # 1.47 insn per cycle - 0.841499880 seconds time elapsed +TOTAL : 0.570368 sec + 2,261,358,530 cycles # 2.855 GHz + 3,305,358,456 instructions # 1.46 insn per cycle + 0.849017060 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -77,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.289992e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.364715e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.364715e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.325738e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.401234e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.401234e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 4.718949 sec - 14,318,249,819 cycles # 3.032 GHz - 38,421,429,911 instructions # 2.68 insn per cycle - 4.724102129 seconds time elapsed +TOTAL : 4.645582 sec + 14,325,809,375 cycles # 3.082 GHz + 38,422,987,894 instructions # 2.68 insn per cycle + 4.650648560 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.077786e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.487595e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.487595e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.201320e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.630090e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.630090e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.204441 sec - 6,639,814,735 cycles # 3.006 GHz - 15,841,902,427 instructions # 2.39 insn per cycle - 2.209539727 seconds time elapsed +TOTAL : 2.152860 sec + 6,643,060,083 cycles # 3.080 GHz + 15,842,584,477 instructions # 2.38 insn per cycle + 2.158023571 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.307822e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.070999e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.070999e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.450401e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.089441e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.089441e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.265035 sec - 3,649,285,785 cycles # 2.875 GHz - 7,591,137,573 instructions # 2.08 insn per cycle - 1.270319196 seconds time elapsed +TOTAL : 1.246774 sec + 3,643,683,352 cycles # 2.913 GHz + 7,592,040,005 instructions # 2.08 insn per cycle + 1.251723719 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.974832e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.160037e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.160037e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.014057e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.180349e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.180349e+06 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.191816 sec - 3,426,519,284 cycles # 2.864 GHz - 7,166,067,248 instructions # 2.09 insn per cycle - 1.197132868 seconds time elapsed +TOTAL : 1.172023 sec + 3,431,252,645 cycles # 2.917 GHz + 7,165,511,136 instructions # 2.09 insn per cycle + 1.177142051 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.265683e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.068951e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.068951e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.431571e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.259454e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.259454e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.584018 sec - 3,241,188,093 cycles # 2.041 GHz - 5,795,628,367 instructions # 1.79 insn per cycle - 1.589192883 seconds time elapsed +TOTAL : 1.550630 sec + 3,238,644,111 cycles # 2.083 GHz + 5,796,702,494 instructions # 1.79 insn per cycle + 1.555869344 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt index 30f4fadf92..573aa8a1a6 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:45:28 +DATE: 2023-11-09_18:20:06 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.447666e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.634082e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.951326e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.875401e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.666103e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.969743e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.513708 sec - 2,149,338,807 cycles # 2.936 GHz - 3,363,855,189 instructions # 1.57 insn per cycle - 0.790810409 seconds time elapsed +TOTAL : 0.505509 sec + 2,155,710,329 cycles # 2.977 GHz + 3,399,528,814 instructions # 1.58 insn per cycle + 0.781726612 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -77,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.247364e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.319306e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.319306e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.331654e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.407717e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.407717e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.751713 sec - 14,161,394,696 cycles # 2.978 GHz - 38,393,782,229 instructions # 2.71 insn per cycle - 4.756965371 seconds time elapsed +TOTAL : 4.581584 sec + 14,155,354,915 cycles # 3.087 GHz + 38,394,211,404 instructions # 2.71 insn per cycle + 4.586893992 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.102956e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.519127e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.519127e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.232934e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.666455e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.666455e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.140784 sec - 6,476,072,518 cycles # 3.019 GHz - 15,828,662,766 instructions # 2.44 insn per cycle - 2.146087935 seconds time elapsed +TOTAL : 2.087317 sec + 6,475,857,503 cycles # 3.096 GHz + 15,829,568,301 instructions # 2.44 insn per cycle + 2.092497637 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.357298e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.077430e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.077430e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.589829e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.103388e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.103388e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.205006 sec - 3,468,184,099 cycles # 2.868 GHz - 7,606,030,531 instructions # 2.19 insn per cycle - 1.210138102 seconds time elapsed +TOTAL : 1.175635 sec + 3,460,928,709 cycles # 2.933 GHz + 7,606,660,397 instructions # 2.20 insn per cycle + 1.180756657 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.559739e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.106426e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.106426e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.939659e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.155071e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.155071e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.182909 sec - 3,252,386,286 cycles # 2.739 GHz - 7,215,128,616 instructions # 2.22 insn per cycle - 1.188234183 seconds time elapsed +TOTAL : 1.139254 sec + 3,252,781,739 cycles # 2.845 GHz + 7,214,861,555 instructions # 2.22 insn per cycle + 1.144377149 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.332938e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.163555e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.163555e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.585994e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.448568e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.448568e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.514986 sec - 3,076,222,583 cycles # 2.024 GHz - 5,845,646,643 instructions # 1.90 insn per cycle - 1.520503790 seconds time elapsed +TOTAL : 1.467081 sec + 3,063,258,508 cycles # 2.082 GHz + 5,845,738,451 instructions # 1.91 insn per cycle + 1.472345808 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index 65eed836f1..415792c712 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:42:11 +DATE: 2023-11-09_18:16:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.910755e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.623741e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.938668e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.158996e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.650796e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.951969e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.613295 sec - 2,456,965,302 cycles # 2.952 GHz - 3,803,211,416 instructions # 1.55 insn per cycle - 0.890835389 seconds time elapsed +TOTAL : 0.604748 sec + 2,477,150,875 cycles # 3.008 GHz + 3,827,452,997 instructions # 1.55 insn per cycle + 0.882802717 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/gcheck.exe -p 2048 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -79,14 +79,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.291712e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.365790e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.365790e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.278227e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.352095e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.352095e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.660750 sec - 14,151,818,953 cycles # 3.034 GHz - 38,392,284,342 instructions # 2.71 insn per cycle - 4.665929439 seconds time elapsed +TOTAL : 4.688006 sec + 14,149,964,703 cycles # 3.016 GHz + 38,393,052,805 instructions # 2.71 insn per cycle + 4.693060305 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 587) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest.exe @@ -106,14 +106,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.100691e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.531126e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.531126e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.195001e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.620625e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.620625e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.141262 sec - 6,484,613,456 cycles # 3.022 GHz - 15,829,197,800 instructions # 2.44 insn per cycle - 2.146554392 seconds time elapsed +TOTAL : 2.102860 sec + 6,473,914,859 cycles # 3.072 GHz + 15,829,595,595 instructions # 2.45 insn per cycle + 2.107994821 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2689) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest.exe @@ -133,14 +133,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.341999e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.073892e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.073892e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.498213e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092419e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092419e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.207094 sec - 3,469,517,910 cycles # 2.864 GHz - 7,605,958,162 instructions # 2.19 insn per cycle - 1.212334488 seconds time elapsed +TOTAL : 1.186261 sec + 3,464,671,010 cycles # 2.910 GHz + 7,606,636,115 instructions # 2.20 insn per cycle + 1.191422669 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3051) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.000164e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.163047e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.163047e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.018341e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.184021e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.184021e+06 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.132933 sec - 3,264,238,503 cycles # 2.869 GHz - 7,214,964,009 instructions # 2.21 insn per cycle - 1.138315941 seconds time elapsed +TOTAL : 1.113056 sec + 3,253,634,801 cycles # 2.912 GHz + 7,214,825,947 instructions # 2.22 insn per cycle + 1.118242022 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2850) (512y: 23) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest.exe @@ -187,14 +187,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.339791e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.166023e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.166023e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.525206e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.371211e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.371211e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.514355 sec - 3,071,490,694 cycles # 2.022 GHz - 5,845,279,944 instructions # 1.90 insn per cycle - 1.519539150 seconds time elapsed +TOTAL : 1.477022 sec + 3,066,754,541 cycles # 2.070 GHz + 5,845,673,759 instructions # 1.91 insn per cycle + 1.482222084 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2364) (512y: 24) (512z: 1889) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 06d8f7d09d..dbd0c88759 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:04:48 +DATE: 2023-11-09_17:41:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.108032e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.751852e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.017010e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.062894e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.751636e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.032491e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.473084 sec - 2,025,626,323 cycles # 2.920 GHz - 2,923,341,053 instructions # 1.44 insn per cycle - 0.752440698 seconds time elapsed +TOTAL : 0.471370 sec + 2,069,916,039 cycles # 2.986 GHz + 2,893,797,319 instructions # 1.40 insn per cycle + 0.749930288 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 @@ -77,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.226197e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.296658e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.296658e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.241816e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.314075e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.314075e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.795639 sec - 14,422,319,778 cycles # 3.005 GHz - 39,889,404,210 instructions # 2.77 insn per cycle - 4.800761254 seconds time elapsed +TOTAL : 4.763197 sec + 14,419,363,408 cycles # 3.025 GHz + 39,885,822,805 instructions # 2.77 insn per cycle + 4.768145939 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 570) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.840353e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.410043e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.410043e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.077159e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.666017e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.666017e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.880181 sec - 5,610,891,745 cycles # 2.978 GHz - 15,305,908,167 instructions # 2.73 insn per cycle - 1.885354787 seconds time elapsed +TOTAL : 1.809175 sec + 5,591,744,554 cycles # 3.083 GHz + 15,300,029,522 instructions # 2.74 insn per cycle + 1.814409785 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2473) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.584020e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.270908e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.270908e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.801496e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.504366e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.504366e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.679496 sec - 4,739,407,479 cycles # 2.814 GHz - 9,752,382,085 instructions # 2.06 insn per cycle - 1.685063058 seconds time elapsed +TOTAL : 1.624464 sec + 4,741,141,330 cycles # 2.911 GHz + 9,747,661,132 instructions # 2.06 insn per cycle + 1.629561959 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3710) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.785300e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.495008e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.495008e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.005329e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.745480e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.745480e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.630325 sec - 4,628,420,386 cycles # 2.831 GHz - 9,343,264,044 instructions # 2.02 insn per cycle - 1.635531127 seconds time elapsed +TOTAL : 1.578447 sec + 4,623,271,493 cycles # 2.921 GHz + 9,339,033,786 instructions # 2.02 insn per cycle + 1.583594825 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3497) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.035393e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.577354e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.577354e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.210537e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.774289e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.774289e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.821625 sec - 3,652,061,133 cycles # 2.000 GHz - 7,049,331,376 instructions # 1.93 insn per cycle - 1.826875192 seconds time elapsed +TOTAL : 1.770504 sec + 3,648,791,259 cycles # 2.056 GHz + 7,045,498,641 instructions # 1.93 insn per cycle + 1.775670307 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2606) (512y: 12) (512z: 2221) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index 430bbd2c8e..c0790b6e36 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:26:11 +DATE: 2023-11-09_18:00:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.386931e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.620878e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.939459e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.858794e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.673199e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.981057e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.478570 sec - 2,066,322,031 cycles # 2.937 GHz - 2,939,169,205 instructions # 1.42 insn per cycle - 0.760998289 seconds time elapsed +TOTAL : 0.474263 sec + 2,125,063,536 cycles # 3.002 GHz + 3,025,852,918 instructions # 1.42 insn per cycle + 0.764897313 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 128 @@ -77,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.585659e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.679951e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.679951e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.589894e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.682971e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.682971e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.141107 sec - 12,606,870,018 cycles # 3.041 GHz - 34,392,677,682 instructions # 2.73 insn per cycle - 4.146310630 seconds time elapsed +TOTAL : 4.133418 sec + 12,609,458,975 cycles # 3.048 GHz + 34,395,001,210 instructions # 2.73 insn per cycle + 4.138439483 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 696) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.476247e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.957210e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.957210e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.435122e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.914251e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.914251e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.000613 sec - 6,098,731,252 cycles # 3.041 GHz - 14,873,462,613 instructions # 2.44 insn per cycle - 2.006051106 seconds time elapsed +TOTAL : 2.014773 sec + 6,085,710,075 cycles # 3.014 GHz + 14,874,327,590 instructions # 2.44 insn per cycle + 2.020198945 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3009) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.182448e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.992665e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.992665e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.550169e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.423492e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.423492e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.544245 sec - 4,326,302,580 cycles # 2.793 GHz - 9,041,454,033 instructions # 2.09 insn per cycle - 1.549495391 seconds time elapsed +TOTAL : 1.471458 sec + 4,290,277,982 cycles # 2.907 GHz + 9,041,954,393 instructions # 2.11 insn per cycle + 1.476543510 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4445) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.602793e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.504278e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.504278e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.705610e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.621776e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.621776e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.462983 sec - 4,209,847,303 cycles # 2.868 GHz - 8,675,528,842 instructions # 2.06 insn per cycle - 1.468300337 seconds time elapsed +TOTAL : 1.443048 sec + 4,208,694,980 cycles # 2.909 GHz + 8,677,287,895 instructions # 2.06 insn per cycle + 1.448442097 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4244) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.697162e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.177263e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.177263e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.842247e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.341676e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.341676e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.925379 sec - 3,842,178,645 cycles # 1.991 GHz - 7,819,452,293 instructions # 2.04 insn per cycle - 1.930845155 seconds time elapsed +TOTAL : 1.878702 sec + 3,847,091,668 cycles # 2.044 GHz + 7,820,914,226 instructions # 2.03 insn per cycle + 1.883936977 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4420) (512y: 0) (512z: 2556) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index c32244c33c..a8fdecb532 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:26:34 +DATE: 2023-11-09_18:01:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.460575e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.684792e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.012555e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.862525e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.715295e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.030318e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086718e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.478960 sec - 2,073,686,428 cycles # 2.952 GHz - 2,982,309,893 instructions # 1.44 insn per cycle - 0.760465246 seconds time elapsed +TOTAL : 0.474547 sec + 2,129,920,938 cycles # 3.015 GHz + 3,022,843,622 instructions # 1.42 insn per cycle + 0.763729431 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 127 @@ -77,14 +77,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.768420e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.879887e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.879887e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.720126e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.824009e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.824009e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 3.874951 sec - 11,759,850,982 cycles # 3.031 GHz - 35,129,174,459 instructions # 2.99 insn per cycle - 3.880406297 seconds time elapsed +TOTAL : 3.938814 sec + 11,787,930,920 cycles # 2.995 GHz + 35,134,515,128 instructions # 2.98 insn per cycle + 3.943783291 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 470) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.548911e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.058975e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.058975e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.688740e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.207831e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.207831e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.977553 sec - 5,960,287,184 cycles # 3.008 GHz - 14,484,169,544 instructions # 2.43 insn per cycle - 1.983134337 seconds time elapsed +TOTAL : 1.927645 sec + 5,955,477,747 cycles # 3.083 GHz + 14,483,875,890 instructions # 2.43 insn per cycle + 1.932605425 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.662372e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.600563e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.600563e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.792092e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.717382e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.717382e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.451994 sec - 4,186,509,528 cycles # 2.874 GHz - 8,887,826,504 instructions # 2.12 insn per cycle - 1.457581768 seconds time elapsed +TOTAL : 1.428222 sec + 4,172,426,658 cycles # 2.912 GHz + 8,888,638,577 instructions # 2.13 insn per cycle + 1.433579963 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3576) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.782199e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.721549e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.721549e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.830183e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.777369e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.777369e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.432127 sec - 4,128,776,992 cycles # 2.874 GHz - 8,424,271,434 instructions # 2.04 insn per cycle - 1.437420732 seconds time elapsed +TOTAL : 1.421326 sec + 4,143,555,691 cycles # 2.906 GHz + 8,424,122,393 instructions # 2.03 insn per cycle + 1.426420575 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3320) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.779314e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.273574e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.273574e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.911357e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.422090e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.422090e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.899022 sec - 3,798,792,191 cycles # 1.996 GHz - 7,712,429,012 instructions # 2.03 insn per cycle - 1.904382082 seconds time elapsed +TOTAL : 1.856974 sec + 3,783,077,119 cycles # 2.033 GHz + 7,713,045,733 instructions # 2.04 insn per cycle + 1.862087187 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3436) (512y: 0) (512z: 2108) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 4284e04c80..bc7d9de588 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:05:13 +DATE: 2023-11-09_17:41:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.262595e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.173145e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.266137e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.109904e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.171630e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.269382e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.516288 sec - 2,170,206,194 cycles # 2.914 GHz - 3,121,753,700 instructions # 1.44 insn per cycle - 0.802206987 seconds time elapsed +TOTAL : 0.511961 sec + 2,222,105,943 cycles # 3.001 GHz + 3,180,029,506 instructions # 1.43 insn per cycle + 0.797868325 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 @@ -77,14 +77,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.129811e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.193121e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.193121e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.142732e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.204413e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.204413e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.027744 sec - 15,293,663,581 cycles # 3.040 GHz - 38,642,438,156 instructions # 2.53 insn per cycle - 5.032856601 seconds time elapsed +TOTAL : 4.999043 sec + 15,266,738,883 cycles # 3.052 GHz + 38,639,692,678 instructions # 2.53 insn per cycle + 5.004417103 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 672) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.666972e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.869148e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.869148e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.675686e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.874485e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.874485e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.964411 sec - 8,933,093,188 cycles # 3.009 GHz - 24,243,353,502 instructions # 2.71 insn per cycle - 2.969821465 seconds time elapsed +TOTAL : 2.956696 sec + 8,943,278,567 cycles # 3.020 GHz + 24,239,461,473 instructions # 2.71 insn per cycle + 2.961985342 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2188) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.660709e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.167400e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.167400e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.810568e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.309343e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.309343e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.961588 sec - 5,410,079,541 cycles # 2.752 GHz - 11,291,080,205 instructions # 2.09 insn per cycle - 1.966921243 seconds time elapsed +TOTAL : 1.907942 sec + 5,390,382,442 cycles # 2.818 GHz + 11,287,870,279 instructions # 2.09 insn per cycle + 1.913175131 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2480) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.588007e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.231756e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.231756e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.736389e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.412733e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.412733e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.695283 sec - 4,860,759,917 cycles # 2.859 GHz - 10,541,284,808 instructions # 2.17 insn per cycle - 1.700590360 seconds time elapsed +TOTAL : 1.660808 sec + 4,859,407,660 cycles # 2.918 GHz + 10,535,709,652 instructions # 2.17 insn per cycle + 1.666185530 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2167) (512y: 148) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.107588e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.350535e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.350535e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.170238e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.418556e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.418556e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.656629 sec - 5,204,386,075 cycles # 1.956 GHz - 7,617,502,706 instructions # 1.46 insn per cycle - 2.661905103 seconds time elapsed +TOTAL : 2.618902 sec + 5,253,729,468 cycles # 2.003 GHz + 7,613,729,309 instructions # 1.45 insn per cycle + 2.624316082 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1633) (512y: 126) (512z: 1608) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index 58d2d743b0..008a5e172d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2023-11-03_19:05:40 +DATE: 2023-11-09_17:41:55 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.265506e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.176728e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270375e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.128890e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.181968e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.279178e+08 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.513169 sec - 2,175,922,923 cycles # 2.936 GHz - 3,154,957,492 instructions # 1.45 insn per cycle - 0.799013980 seconds time elapsed +TOTAL : 0.513214 sec + 2,219,973,022 cycles # 2.991 GHz + 3,202,428,118 instructions # 1.44 insn per cycle + 0.799522630 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/gcheck.exe -p 2048 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 208 @@ -77,14 +77,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.110999e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.171227e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.171227e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.124085e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.184530e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.184530e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.072155 sec - 15,377,556,110 cycles # 3.029 GHz - 40,435,905,161 instructions # 2.63 insn per cycle - 5.077406066 seconds time elapsed +TOTAL : 5.039921 sec + 15,384,037,518 cycles # 3.050 GHz + 40,433,132,851 instructions # 2.63 insn per cycle + 5.045085372 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 669) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest.exe @@ -104,14 +104,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.761885e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.974310e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.974310e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.855191e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.079392e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.079392e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.891901 sec - 8,516,736,770 cycles # 2.941 GHz - 23,273,421,536 instructions # 2.73 insn per cycle - 2.897134410 seconds time elapsed +TOTAL : 2.823965 sec + 8,503,215,845 cycles # 3.006 GHz + 23,269,764,862 instructions # 2.74 insn per cycle + 2.829223148 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 2091) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest.exe @@ -131,14 +131,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.041812e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.416387e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.416387e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.125017e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.510855e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.510855e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.184891 sec - 6,239,964,038 cycles # 2.850 GHz - 12,976,938,369 instructions # 2.08 insn per cycle - 2.190210603 seconds time elapsed +TOTAL : 2.149257 sec + 6,265,408,652 cycles # 2.910 GHz + 12,973,997,697 instructions # 2.07 insn per cycle + 2.154583439 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2669) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest.exe @@ -158,14 +158,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.262419e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.673980e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.673980e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.427179e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.860121e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.860121e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.097286 sec - 5,931,604,060 cycles # 2.822 GHz - 12,254,844,972 instructions # 2.07 insn per cycle - 2.102596228 seconds time elapsed +TOTAL : 2.035544 sec + 5,944,578,726 cycles # 2.915 GHz + 12,250,352,313 instructions # 2.06 insn per cycle + 2.040880399 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2209) (512y: 296) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest.exe @@ -185,14 +185,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.636806e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.830983e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.830983e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.896609e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.113493e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.113493e+05 ) sec^-1 MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.989274 sec - 5,599,763,733 cycles # 1.871 GHz - 8,758,209,944 instructions # 1.56 insn per cycle - 2.994808333 seconds time elapsed +TOTAL : 2.794321 sec + 5,604,210,205 cycles # 2.003 GHz + 8,753,670,387 instructions # 1.56 insn per cycle + 2.799501421 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1490) (512y: 183) (512z: 1909) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index c973ded005..a6a310dca7 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-11-03_19:06:08 +DATE: 2023-11-09_17:42:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.987778e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.047089e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.059978e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.987135e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.050792e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.063302e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.462314 sec - 1,969,733,176 cycles # 2.915 GHz - 2,854,417,454 instructions # 1.45 insn per cycle - 0.732902295 seconds time elapsed +TOTAL : 0.461215 sec + 2,013,982,440 cycles # 2.996 GHz + 2,888,271,641 instructions # 1.43 insn per cycle + 0.731639311 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.125374e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.318187e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.329149e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.121271e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.323663e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.335167e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.595579 sec - 2,446,683,532 cycles # 2.952 GHz - 3,726,903,800 instructions # 1.52 insn per cycle - 0.888429467 seconds time elapsed +TOTAL : 0.596567 sec + 2,489,603,363 cycles # 2.997 GHz + 3,769,346,991 instructions # 1.51 insn per cycle + 0.890614911 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.543975e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.556543e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.556543e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.576698e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.589005e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.589005e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.463148 sec - 19,697,684,289 cycles # 3.046 GHz - 59,611,728,869 instructions # 3.03 insn per cycle - 6.467313414 seconds time elapsed +TOTAL : 6.380855 sec + 19,728,048,826 cycles # 3.090 GHz + 59,610,032,345 instructions # 3.02 insn per cycle + 6.384875624 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.806236e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.850408e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.850408e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.837473e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.882254e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.882254e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.430883 sec - 10,361,092,942 cycles # 3.017 GHz - 30,679,655,225 instructions # 2.96 insn per cycle - 3.435128458 seconds time elapsed +TOTAL : 3.409518 sec + 10,359,121,121 cycles # 3.036 GHz + 30,679,203,213 instructions # 2.96 insn per cycle + 3.413745701 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.723128e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.902993e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.902993e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.786469e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.964416e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.964416e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.707466 sec - 4,879,146,362 cycles # 2.851 GHz - 11,021,709,924 instructions # 2.26 insn per cycle - 1.711937944 seconds time elapsed +TOTAL : 1.696222 sec + 4,887,496,480 cycles # 2.875 GHz + 11,021,602,656 instructions # 2.26 insn per cycle + 1.700511665 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.083664e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.105516e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.105516e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.093744e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.115987e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.115987e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.533989 sec - 4,371,523,225 cycles # 2.843 GHz - 10,299,869,041 instructions # 2.36 insn per cycle - 1.538284203 seconds time elapsed +TOTAL : 1.520406 sec + 4,369,323,760 cycles # 2.867 GHz + 10,298,269,078 instructions # 2.36 insn per cycle + 1.524718704 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.583252e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.691167e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.691167e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.753883e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.865687e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.865687e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.184881 sec - 4,101,268,943 cycles # 1.874 GHz - 5,846,549,953 instructions # 1.43 insn per cycle - 2.189162148 seconds time elapsed +TOTAL : 2.137350 sec + 4,099,012,031 cycles # 1.915 GHz + 5,845,815,520 instructions # 1.43 insn per cycle + 2.141590310 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index cc88ce6db1..47e341807c 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-11-03_19:36:12 +DATE: 2023-11-09_18:10:58 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.617150e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.773641e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.773641e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.707712e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.862456e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.862456e+06 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.490872 sec - 2,070,161,118 cycles # 2.946 GHz - 3,152,579,676 instructions # 1.52 insn per cycle - 0.759960652 seconds time elapsed +TOTAL : 0.491938 sec + 2,095,418,329 cycles # 2.943 GHz + 3,181,792,573 instructions # 1.52 insn per cycle + 0.771165711 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.687018e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.487518e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.487518e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.763222e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.617411e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.617411e+06 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.832612 sec - 3,193,307,533 cycles # 2.947 GHz - 4,978,788,975 instructions # 1.56 insn per cycle - 1.143205796 seconds time elapsed +TOTAL : 0.818666 sec + 3,177,291,822 cycles # 2.975 GHz + 5,098,451,441 instructions # 1.60 insn per cycle + 1.129356217 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -104,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.529162e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.541866e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.541866e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.524248e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.536588e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.536588e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.507705 sec - 19,736,202,639 cycles # 3.031 GHz - 59,616,040,959 instructions # 3.02 insn per cycle - 6.512416242 seconds time elapsed +TOTAL : 6.519126 sec + 19,771,628,211 cycles # 3.032 GHz + 59,619,366,283 instructions # 3.02 insn per cycle + 6.523440391 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest.exe @@ -132,14 +132,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.815393e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.861165e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.861165e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.881918e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.927973e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.927973e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.431600 sec - 10,398,990,181 cycles # 3.027 GHz - 30,726,516,620 instructions # 2.95 insn per cycle - 3.436080496 seconds time elapsed +TOTAL : 3.385706 sec + 10,402,667,023 cycles # 3.069 GHz + 30,728,506,666 instructions # 2.95 insn per cycle + 3.390173573 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 5153) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.253880e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.426870e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.426870e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.797699e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.978652e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.978652e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.802152 sec - 4,928,997,803 cycles # 2.730 GHz - 11,072,368,065 instructions # 2.25 insn per cycle - 1.806633331 seconds time elapsed +TOTAL : 1.701840 sec + 4,920,530,137 cycles # 2.885 GHz + 11,072,335,054 instructions # 2.25 insn per cycle + 1.706256708 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4467) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest.exe @@ -188,14 +188,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.076136e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.098656e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.098656e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.099458e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.122078e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.122078e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.553423 sec - 4,411,400,335 cycles # 2.833 GHz - 10,349,798,385 instructions # 2.35 insn per cycle - 1.557941492 seconds time elapsed +TOTAL : 1.518361 sec + 4,398,354,549 cycles # 2.890 GHz + 10,347,368,561 instructions # 2.35 insn per cycle + 1.522642923 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4137) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest.exe @@ -216,14 +216,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.266833e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.375233e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.375233e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.773044e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.885749e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.885749e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.287929 sec - 4,148,582,308 cycles # 1.811 GHz - 5,885,924,420 instructions # 1.42 insn per cycle - 2.292472050 seconds time elapsed +TOTAL : 2.139176 sec + 4,134,059,026 cycles # 1.929 GHz + 5,885,050,529 instructions # 1.42 insn per cycle + 2.143583199 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1540) (512y: 95) (512z: 3466) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index 890a9e444f..de9a4f17b0 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-11-03_19:06:37 +DATE: 2023-11-09_17:42:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.934806e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.040123e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.052620e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.944811e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.043287e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.055886e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.460430 sec - 1,973,324,046 cycles # 2.928 GHz - 2,840,856,751 instructions # 1.44 insn per cycle - 0.731489352 seconds time elapsed +TOTAL : 0.460458 sec + 2,026,281,331 cycles # 3.005 GHz + 2,900,924,761 instructions # 1.43 insn per cycle + 0.731473724 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.120884e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.312101e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.322916e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.115492e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.315818e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.327216e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.593653 sec - 2,438,307,110 cycles # 2.956 GHz - 3,770,815,852 instructions # 1.55 insn per cycle - 0.884294118 seconds time elapsed +TOTAL : 0.589562 sec + 2,467,189,653 cycles # 3.006 GHz + 3,742,728,616 instructions # 1.52 insn per cycle + 0.882301885 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.568377e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.581048e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.581048e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.562701e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.575539e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.575539e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.401933 sec - 19,482,758,220 cycles # 3.042 GHz - 58,802,978,389 instructions # 3.02 insn per cycle - 6.406140471 seconds time elapsed +TOTAL : 6.415806 sec + 19,556,589,093 cycles # 3.047 GHz + 58,802,097,142 instructions # 3.01 insn per cycle + 6.419943255 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1313) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.917983e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.963815e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.963815e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.964793e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.010479e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.010479e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.353380 sec - 10,239,214,469 cycles # 3.050 GHz - 30,351,045,797 instructions # 2.96 insn per cycle - 3.357673213 seconds time elapsed +TOTAL : 3.321576 sec + 10,234,879,480 cycles # 3.078 GHz + 30,349,718,565 instructions # 2.97 insn per cycle + 3.325925546 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4970) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.402320e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.570383e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.570383e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.508412e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.675254e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.675254e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.764710 sec - 5,042,998,580 cycles # 2.852 GHz - 11,486,615,235 instructions # 2.28 insn per cycle - 1.768978894 seconds time elapsed +TOTAL : 1.744975 sec + 5,046,123,954 cycles # 2.887 GHz + 11,486,788,981 instructions # 2.28 insn per cycle + 1.749151834 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4591) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.003860e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.023445e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.023445e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.033659e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.053692e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.053692e+05 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.654433 sec - 4,647,317,234 cycles # 2.803 GHz - 10,844,918,785 instructions # 2.33 insn per cycle - 1.658681615 seconds time elapsed +TOTAL : 1.606653 sec + 4,645,095,124 cycles # 2.885 GHz + 10,843,590,320 instructions # 2.33 insn per cycle + 1.610949978 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4183) (512y: 244) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.419133e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.526721e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.526721e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.741864e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.853507e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.853507e+04 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.233568 sec - 4,119,227,015 cycles # 1.842 GHz - 6,111,995,104 instructions # 1.48 insn per cycle - 2.238507475 seconds time elapsed +TOTAL : 2.140995 sec + 4,112,867,345 cycles # 1.919 GHz + 6,110,383,002 instructions # 1.49 insn per cycle + 2.145162136 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1457) (512y: 139) (512z: 3568) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 906002ccef..f7b3cf47d9 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-11-03_19:07:06 +DATE: 2023-11-09_17:43:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.570718e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.332431e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.423909e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.559244e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.332615e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.416599e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.445719 sec - 1,977,839,409 cycles # 2.946 GHz - 2,766,831,818 instructions # 1.40 insn per cycle - 0.728762524 seconds time elapsed +TOTAL : 0.442703 sec + 1,956,548,432 cycles # 2.973 GHz + 2,743,818,395 instructions # 1.40 insn per cycle + 0.717328196 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 254 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.444258e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.461256e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.527187e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.415878e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.488188e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.558288e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.490311 sec - 2,098,277,441 cycles # 2.940 GHz - 3,050,395,563 instructions # 1.45 insn per cycle - 0.771282830 seconds time elapsed +TOTAL : 0.487204 sec + 2,131,239,677 cycles # 3.000 GHz + 3,082,245,234 instructions # 1.45 insn per cycle + 0.768130616 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -86,9 +86,9 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 32,139,063 cycles # 2.763 GHz - 49,369,582 instructions # 1.54 insn per cycle - 0.012019390 seconds time elapsed + 31,971,805 cycles # 2.811 GHz + 48,583,386 instructions # 1.52 insn per cycle + 0.011876482 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index afa8c22c25..e1663755b4 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-11-03_19:36:42 +DATE: 2023-11-09_18:11:27 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.935100e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.139273e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.139273e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.114759e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.213627e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.213627e+07 ) sec^-1 MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.458037 sec - 1,958,659,698 cycles # 2.936 GHz - 2,907,533,469 instructions # 1.48 insn per cycle - 0.726231579 seconds time elapsed +TOTAL : 0.453382 sec + 1,979,110,250 cycles # 2.985 GHz + 2,941,718,851 instructions # 1.49 insn per cycle + 0.719982475 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.639472e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.576828e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.576828e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.789515e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.657512e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.657512e+07 ) sec^-1 MeanMatrixElemValue = ( 6.737500e+02 +- 4.776370e+02 ) GeV^-2 -TOTAL : 0.638235 sec - 2,567,083,186 cycles # 2.951 GHz - 3,965,073,751 instructions # 1.54 insn per cycle - 0.927254467 seconds time elapsed +TOTAL : 0.632326 sec + 2,585,787,492 cycles # 3.000 GHz + 3,972,159,776 instructions # 1.54 insn per cycle + 0.920056111 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -99,9 +99,9 @@ OK (relative difference <= 5E-3) runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) - 38,813,158 cycles # 2.791 GHz - 52,008,055 instructions # 1.34 insn per cycle - 0.014463641 seconds time elapsed + 38,570,643 cycles # 2.885 GHz + 52,119,941 instructions # 1.35 insn per cycle + 0.013856202 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1034) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index e0c37ae81b..e8b37410be 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-11-03_19:07:15 +DATE: 2023-11-09_17:43:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.552711e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.312060e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.409477e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.567326e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.333824e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.424930e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.443645 sec - 1,939,887,285 cycles # 2.958 GHz - 2,753,223,301 instructions # 1.42 insn per cycle - 0.713433638 seconds time elapsed +TOTAL : 0.444883 sec + 1,998,454,742 cycles # 2.980 GHz + 2,813,430,207 instructions # 1.41 insn per cycle + 0.728667460 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 248 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.420862e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.422248e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.487501e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.379215e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.422915e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.490315e+07 ) sec^-1 MeanMatrixElemValue = ( 6.630099e+02 +- 4.770719e+02 ) GeV^-2 -TOTAL : 0.489840 sec - 2,095,642,051 cycles # 2.944 GHz - 3,058,032,700 instructions # 1.46 insn per cycle - 0.771189239 seconds time elapsed +TOTAL : 0.488237 sec + 2,124,585,750 cycles # 2.987 GHz + 3,077,258,575 instructions # 1.45 insn per cycle + 0.769041859 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -86,9 +86,9 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 31,454,006 cycles # 2.782 GHz - 48,514,001 instructions # 1.54 insn per cycle - 0.011695448 seconds time elapsed + 31,375,066 cycles # 2.814 GHz + 47,697,134 instructions # 1.52 insn per cycle + 0.011523392 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1029) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 9bd85e98d0..aa3d979423 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-11-03_19:07:25 +DATE: 2023-11-09_17:43:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.981637e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.050998e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.064107e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.974532e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.049892e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.062592e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.460239 sec - 1,991,164,692 cycles # 2.956 GHz - 2,861,513,835 instructions # 1.44 insn per cycle - 0.731121053 seconds time elapsed +TOTAL : 0.466235 sec + 1,982,451,794 cycles # 2.881 GHz + 2,904,128,689 instructions # 1.46 insn per cycle + 0.746029193 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.125939e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.318916e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.329956e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.118841e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.320828e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.332362e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.595711 sec - 2,444,157,832 cycles # 2.957 GHz - 3,696,457,333 instructions # 1.51 insn per cycle - 0.888026518 seconds time elapsed +TOTAL : 0.602851 sec + 2,418,002,144 cycles # 2.873 GHz + 3,684,858,061 instructions # 1.52 insn per cycle + 0.899181828 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -86,9 +86,9 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 35,021,922 cycles # 2.756 GHz - 50,809,631 instructions # 1.45 insn per cycle - 0.013111359 seconds time elapsed + 34,749,440 cycles # 2.771 GHz + 50,090,467 instructions # 1.44 insn per cycle + 0.013126058 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1399) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index 659836495f..fa1b7c54dc 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2023-11-03_19:07:34 +DATE: 2023-11-09_17:43:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.948465e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.041856e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.054410e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.943854e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.040668e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.053565e+07 ) sec^-1 MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.460434 sec - 1,981,925,545 cycles # 2.941 GHz - 2,855,578,890 instructions # 1.44 insn per cycle - 0.731466835 seconds time elapsed +TOTAL : 0.466224 sec + 1,967,855,817 cycles # 2.845 GHz + 2,813,069,845 instructions # 1.43 insn per cycle + 0.750240924 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.114794e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.303596e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.314294e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.108193e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.305249e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.316509e+07 ) sec^-1 MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.592739 sec - 2,423,209,817 cycles # 2.940 GHz - 3,698,114,761 instructions # 1.53 insn per cycle - 0.885260737 seconds time elapsed +TOTAL : 0.594738 sec + 2,473,036,884 cycles # 2.994 GHz + 3,768,499,475 instructions # 1.52 insn per cycle + 0.886746599 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -86,9 +86,9 @@ OK (relative difference <= 5E-3) ========================================================================= runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 34,542,827 cycles # 2.778 GHz - 50,097,141 instructions # 1.45 insn per cycle - 0.012808089 seconds time elapsed + 34,257,253 cycles # 2.793 GHz + 49,140,913 instructions # 1.43 insn per cycle + 0.012667194 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 1276) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index a9f9e7f9b0..5de2ca45d8 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:07:44 +DATE: 2023-11-09_17:43:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.471280e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.495513e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.497667e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.498898e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.522792e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.525024e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.521778 sec - 2,221,753,731 cycles # 2.953 GHz - 3,509,979,793 instructions # 1.58 insn per cycle - 0.811888374 seconds time elapsed +TOTAL : 0.521868 sec + 2,246,075,293 cycles # 2.975 GHz + 3,415,991,617 instructions # 1.52 insn per cycle + 0.815510814 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.130694e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.157314e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.158457e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.122388e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.150135e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.151328e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.024926 sec - 9,877,023,451 cycles # 3.016 GHz - 20,938,621,148 instructions # 2.12 insn per cycle - 3.332222792 seconds time elapsed +TOTAL : 3.026853 sec + 9,913,864,058 cycles # 3.024 GHz + 22,195,735,281 instructions # 2.24 insn per cycle + 3.335346642 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.942881e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.943811e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.943811e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.927075e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.927983e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.927983e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.450914 sec - 25,661,004,969 cycles # 3.035 GHz - 78,943,064,293 instructions # 3.08 insn per cycle - 8.455241133 seconds time elapsed +TOTAL : 8.520375 sec + 25,675,362,415 cycles # 3.013 GHz + 78,943,710,554 instructions # 3.07 insn per cycle + 8.524455360 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.566286e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.569647e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.569647e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.557363e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.560585e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.560585e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.607952 sec - 12,925,846,736 cycles # 2.803 GHz - 39,287,875,718 instructions # 3.04 insn per cycle - 4.612260028 seconds time elapsed +TOTAL : 4.619361 sec + 12,935,854,234 cycles # 2.798 GHz + 39,286,025,399 instructions # 3.04 insn per cycle + 4.623706542 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.376392e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.393376e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.393376e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.091948e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.108522e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.108522e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.967322 sec - 5,576,808,906 cycles # 2.829 GHz - 13,690,679,702 instructions # 2.45 insn per cycle - 1.971661788 seconds time elapsed +TOTAL : 2.036281 sec + 5,584,766,890 cycles # 2.738 GHz + 13,690,141,249 instructions # 2.45 insn per cycle + 2.040702440 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.568825e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.591271e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.591271e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.675809e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.698948e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.698948e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.723570 sec - 4,897,962,779 cycles # 2.836 GHz - 12,345,795,320 instructions # 2.52 insn per cycle - 1.727906957 seconds time elapsed +TOTAL : 1.704074 sec + 4,897,181,740 cycles # 2.868 GHz + 12,344,518,245 instructions # 2.52 insn per cycle + 1.708309061 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.463403e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.476893e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.476893e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.632146e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.645889e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.645889e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.207008 sec - 4,113,706,051 cycles # 1.861 GHz - 6,338,446,257 instructions # 1.54 insn per cycle - 2.211395304 seconds time elapsed +TOTAL : 2.158505 sec + 4,118,735,499 cycles # 1.905 GHz + 6,336,932,858 instructions # 1.54 insn per cycle + 2.162776211 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 05b9b7b471..322fb0150d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:37:26 +DATE: 2023-11-09_18:12:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.138586e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.475297e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.475297e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.165662e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.477249e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.477249e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.514369 sec - 2,174,774,169 cycles # 2.935 GHz - 3,408,753,270 instructions # 1.57 insn per cycle - 0.802511668 seconds time elapsed +TOTAL : 0.512387 sec + 2,201,868,575 cycles # 2.980 GHz + 3,430,381,187 instructions # 1.56 insn per cycle + 0.801238529 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.635405e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.119639e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.119639e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.642632e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.111769e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.111769e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.311178 sec - 10,730,531,324 cycles # 2.994 GHz - 24,179,707,994 instructions # 2.25 insn per cycle - 3.640277810 seconds time elapsed +TOTAL : 3.299595 sec + 10,919,109,000 cycles # 3.053 GHz + 24,319,272,982 instructions # 2.23 insn per cycle + 3.633626468 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -104,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.906612e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.907549e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.907549e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.957325e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.958258e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.958258e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.615680 sec - 25,666,310,685 cycles # 2.978 GHz - 78,949,148,944 instructions # 3.08 insn per cycle - 8.620265583 seconds time elapsed +TOTAL : 8.392728 sec + 25,662,881,797 cycles # 3.059 GHz + 78,952,840,684 instructions # 3.08 insn per cycle + 8.396994023 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -132,14 +132,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.685334e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.688850e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.688850e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.730470e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.733980e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.733980e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.463406 sec - 12,942,626,026 cycles # 2.897 GHz - 39,297,696,719 instructions # 3.04 insn per cycle - 4.468216686 seconds time elapsed +TOTAL : 4.409754 sec + 12,949,002,647 cycles # 2.934 GHz + 39,297,510,156 instructions # 3.03 insn per cycle + 4.414215325 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.403877e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.422097e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.422097e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.533999e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.551780e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.551780e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.965161 sec - 5,597,716,321 cycles # 2.843 GHz - 13,700,115,311 instructions # 2.45 insn per cycle - 1.969720229 seconds time elapsed +TOTAL : 1.934795 sec + 5,595,375,698 cycles # 2.886 GHz + 13,699,668,832 instructions # 2.45 insn per cycle + 1.939106700 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -188,14 +188,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.573549e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.596918e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.596918e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.706839e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.728905e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.728905e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.726627 sec - 4,910,197,742 cycles # 2.838 GHz - 12,354,930,161 instructions # 2.52 insn per cycle - 1.731069519 seconds time elapsed +TOTAL : 1.702912 sec + 4,912,481,885 cycles # 2.879 GHz + 12,355,076,796 instructions # 2.52 insn per cycle + 1.707414472 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -216,14 +216,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.408369e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.421923e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.421923e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.525002e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.540499e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.540499e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.227463 sec - 4,132,274,023 cycles # 1.852 GHz - 6,348,232,709 instructions # 1.54 insn per cycle - 2.231941444 seconds time elapsed +TOTAL : 2.193518 sec + 4,132,016,890 cycles # 1.881 GHz + 6,348,500,069 instructions # 1.54 insn per cycle + 2.198089448 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index d4a13c45dc..4e138ec032 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:49:13 +DATE: 2023-11-09_18:23:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.490628e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.519771e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.522013e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.485315e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.511617e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.513675e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.506263 sec - 2,193,209,541 cycles # 2.934 GHz - 3,448,112,270 instructions # 1.57 insn per cycle - 0.811794626 seconds time elapsed +TOTAL : 0.505341 sec + 2,219,350,607 cycles # 2.986 GHz + 3,460,374,619 instructions # 1.56 insn per cycle + 0.811034575 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.140777e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.174961e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.176419e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.144642e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.176791e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.178152e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.133332 sec - 10,144,803,574 cycles # 2.992 GHz - 22,979,164,856 instructions # 2.27 insn per cycle - 3.446699997 seconds time elapsed +TOTAL : 3.133190 sec + 10,226,911,008 cycles # 3.021 GHz + 21,462,701,558 instructions # 2.10 insn per cycle + 3.444111151 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.934897e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.935823e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.935823e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.962376e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.963339e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.963339e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.487397 sec - 25,642,144,633 cycles # 3.020 GHz - 78,942,503,354 instructions # 3.08 insn per cycle - 8.491509185 seconds time elapsed +TOTAL : 8.368138 sec + 25,660,792,563 cycles # 3.066 GHz + 78,945,591,899 instructions # 3.08 insn per cycle + 8.372166508 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.604711e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.608085e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.608085e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.725556e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.729176e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.729176e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.560510 sec - 12,949,935,406 cycles # 2.841 GHz - 39,287,959,625 instructions # 3.03 insn per cycle - 4.564590789 seconds time elapsed +TOTAL : 4.413013 sec + 12,940,530,582 cycles # 2.932 GHz + 39,286,713,275 instructions # 3.04 insn per cycle + 4.417069788 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.331820e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.349574e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.349574e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.541485e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.558659e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.558659e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.979581 sec - 5,585,242,942 cycles # 2.817 GHz - 13,688,645,923 instructions # 2.45 insn per cycle - 1.983846301 seconds time elapsed +TOTAL : 1.930984 sec + 5,584,027,716 cycles # 2.887 GHz + 13,688,917,418 instructions # 2.45 insn per cycle + 1.935195895 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.501909e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.523734e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.523734e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.785385e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.808420e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.808420e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.737131 sec - 4,904,473,574 cycles # 2.818 GHz - 12,343,066,066 instructions # 2.52 insn per cycle - 1.741373569 seconds time elapsed +TOTAL : 1.686810 sec + 4,897,782,017 cycles # 2.898 GHz + 12,342,341,736 instructions # 2.52 insn per cycle + 1.690859675 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.326865e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.339889e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.339889e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.578298e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.591405e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.591405e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.249568 sec - 4,122,823,033 cycles # 1.830 GHz - 6,335,244,526 instructions # 1.54 insn per cycle - 2.253741280 seconds time elapsed +TOTAL : 2.175070 sec + 4,121,604,366 cycles # 1.892 GHz + 6,334,904,963 instructions # 1.54 insn per cycle + 2.179001381 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt index 8a019b9732..a5bd4bb577 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:45:52 +DATE: 2023-11-09_18:20:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.497991e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.525524e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.527678e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.495033e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.521313e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.523414e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.505150 sec - 2,198,803,568 cycles # 2.954 GHz - 3,469,496,289 instructions # 1.58 insn per cycle - 0.812740673 seconds time elapsed +TOTAL : 0.502389 sec + 2,234,960,295 cycles # 3.014 GHz + 3,501,182,478 instructions # 1.57 insn per cycle + 0.813908762 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.149366e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.183697e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.185208e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.146228e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.178481e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.179832e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.069067 sec - 9,961,450,693 cycles # 3.001 GHz - 22,775,488,914 instructions # 2.29 insn per cycle - 3.378594275 seconds time elapsed +TOTAL : 3.070610 sec + 10,014,430,488 cycles # 3.015 GHz + 23,183,698,994 instructions # 2.32 insn per cycle + 3.378407946 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.919154e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.920062e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.920062e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.972782e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.973730e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.973730e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.555088 sec - 25,630,164,257 cycles # 2.995 GHz - 78,942,698,347 instructions # 3.08 insn per cycle - 8.559388166 seconds time elapsed +TOTAL : 8.322572 sec + 25,630,767,892 cycles # 3.079 GHz + 78,944,418,555 instructions # 3.08 insn per cycle + 8.326671797 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.673575e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.677034e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.677034e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.718928e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.722195e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.722195e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.474572 sec - 12,938,774,287 cycles # 2.890 GHz - 39,284,863,862 instructions # 3.04 insn per cycle - 4.478882140 seconds time elapsed +TOTAL : 4.419054 sec + 12,933,087,616 cycles # 2.925 GHz + 39,284,437,808 instructions # 3.04 insn per cycle + 4.423270824 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.365364e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.382422e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.382422e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.554509e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.572221e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.572221e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.970072 sec - 5,585,160,191 cycles # 2.830 GHz - 13,689,327,859 instructions # 2.45 insn per cycle - 1.974279626 seconds time elapsed +TOTAL : 1.926889 sec + 5,576,123,810 cycles # 2.889 GHz + 13,689,166,422 instructions # 2.45 insn per cycle + 1.931047296 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.573694e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.596726e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.596726e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.729620e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.752389e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.752389e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.722482 sec - 4,895,075,879 cycles # 2.836 GHz - 12,344,411,096 instructions # 2.52 insn per cycle - 1.726704102 seconds time elapsed +TOTAL : 1.694985 sec + 4,901,721,494 cycles # 2.886 GHz + 12,344,869,251 instructions # 2.52 insn per cycle + 1.699075447 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.342892e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.356180e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.356180e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.451359e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.465184e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.465184e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.243467 sec - 4,145,301,834 cycles # 1.845 GHz - 6,337,134,423 instructions # 1.53 insn per cycle - 2.247770943 seconds time elapsed +TOTAL : 2.210683 sec + 4,119,158,466 cycles # 1.861 GHz + 6,337,202,754 instructions # 1.54 insn per cycle + 2.214903970 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index 0761c0d014..e1894928b5 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:42:34 +DATE: 2023-11-09_18:17:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.224877e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.534029e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.536870e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.185134e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.497070e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.499968e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.509685 sec - 2,194,677,974 cycles # 2.952 GHz - 3,468,699,947 instructions # 1.58 insn per cycle - 0.805522488 seconds time elapsed +TOTAL : 0.512712 sec + 2,117,085,337 cycles # 2.853 GHz + 3,348,083,687 instructions # 1.58 insn per cycle + 0.802067553 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -71,14 +71,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.741528e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.176834e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.178277e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.746826e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.178822e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.180194e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.196085 sec - 10,332,938,289 cycles # 2.993 GHz - 23,233,171,839 instructions # 2.25 insn per cycle - 3.511259911 seconds time elapsed +TOTAL : 3.195850 sec + 10,403,522,722 cycles # 3.010 GHz + 22,812,003,731 instructions # 2.19 insn per cycle + 3.513623861 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -94,14 +94,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.927835e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.928807e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.928807e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.978212e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.979161e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.979161e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.516575 sec - 25,626,746,874 cycles # 3.008 GHz - 78,942,783,638 instructions # 3.08 insn per cycle - 8.520860421 seconds time elapsed +TOTAL : 8.300221 sec + 25,643,059,514 cycles # 3.089 GHz + 78,945,101,648 instructions # 3.08 insn per cycle + 8.304495187 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4892) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.674456e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.677849e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.677849e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.720030e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.723443e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.723443e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.472834 sec - 12,938,647,402 cycles # 2.891 GHz - 39,285,558,550 instructions # 3.04 insn per cycle - 4.477166946 seconds time elapsed +TOTAL : 4.417696 sec + 12,936,090,694 cycles # 2.926 GHz + 39,285,549,332 instructions # 3.04 insn per cycle + 4.421886330 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13182) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest.exe @@ -148,14 +148,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.290335e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.307469e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.307469e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.467679e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.484549e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.484549e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.987857 sec - 5,582,015,296 cycles # 2.804 GHz - 13,690,066,849 instructions # 2.45 insn per cycle - 1.992149312 seconds time elapsed +TOTAL : 1.946291 sec + 5,575,526,782 cycles # 2.860 GHz + 13,689,232,963 instructions # 2.46 insn per cycle + 1.950526745 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11357) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest.exe @@ -175,14 +175,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.537627e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.561759e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.561759e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.714029e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.737204e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.737204e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.729438 sec - 4,899,116,746 cycles # 2.827 GHz - 12,344,356,410 instructions # 2.52 insn per cycle - 1.733854664 seconds time elapsed +TOTAL : 1.697675 sec + 4,893,869,630 cycles # 2.877 GHz + 12,345,121,576 instructions # 2.52 insn per cycle + 1.701906664 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10266) (512y: 88) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest.exe @@ -202,14 +202,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.331605e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.345774e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.345774e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.624620e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.638794e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.638794e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.247519 sec - 4,126,377,191 cycles # 1.833 GHz - 6,337,288,668 instructions # 1.54 insn per cycle - 2.251874954 seconds time elapsed +TOTAL : 2.160421 sec + 4,114,771,943 cycles # 1.902 GHz + 6,336,936,596 instructions # 1.54 insn per cycle + 2.164683207 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1821) (512y: 102) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index d519ec18af..d9a60f4c2d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:08:21 +DATE: 2023-11-09_17:44:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.482135e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.509267e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.511176e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.474117e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.499523e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.501625e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.519206 sec - 2,212,325,201 cycles # 2.954 GHz - 3,433,704,735 instructions # 1.55 insn per cycle - 0.807580904 seconds time elapsed +TOTAL : 0.521490 sec + 2,250,098,032 cycles # 2.995 GHz + 3,547,618,625 instructions # 1.58 insn per cycle + 0.811334512 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.159162e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.186085e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.187240e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.144032e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.172097e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.173315e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.004869 sec - 9,812,463,662 cycles # 3.013 GHz - 21,581,231,713 instructions # 2.20 insn per cycle - 3.312573877 seconds time elapsed +TOTAL : 3.014234 sec + 9,779,736,194 cycles # 2.987 GHz + 19,303,224,180 instructions # 1.97 insn per cycle + 3.330161859 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.947345e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.948277e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.948277e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.971538e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.972487e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.972487e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.431137 sec - 25,590,035,480 cycles # 3.034 GHz - 78,715,048,416 instructions # 3.08 insn per cycle - 8.435307792 seconds time elapsed +TOTAL : 8.327938 sec + 25,611,620,219 cycles # 3.074 GHz + 78,715,429,796 instructions # 3.07 insn per cycle + 8.332111280 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4263) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.620452e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.623805e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.623805e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.709838e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.713193e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.713193e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.539871 sec - 12,909,848,042 cycles # 2.843 GHz - 39,233,023,972 instructions # 3.04 insn per cycle - 4.544176080 seconds time elapsed +TOTAL : 4.429736 sec + 12,908,947,595 cycles # 2.912 GHz + 39,230,824,629 instructions # 3.04 insn per cycle + 4.433832156 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:12949) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.331174e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.348654e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.348654e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.184366e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.200734e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.200734e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.977747 sec - 5,618,064,764 cycles # 2.836 GHz - 13,804,762,963 instructions # 2.46 insn per cycle - 1.981982814 seconds time elapsed +TOTAL : 2.013363 sec + 5,615,451,412 cycles # 2.785 GHz + 13,804,151,174 instructions # 2.46 insn per cycle + 2.017493867 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11422) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.463129e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.484771e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.484771e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.496512e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.518383e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.518383e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.742192 sec - 4,960,747,667 cycles # 2.842 GHz - 12,470,817,922 instructions # 2.51 insn per cycle - 1.746604551 seconds time elapsed +TOTAL : 1.736002 sec + 4,961,501,370 cycles # 2.852 GHz + 12,469,539,646 instructions # 2.51 insn per cycle + 1.740286680 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10258) (512y: 240) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.427183e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.440655e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.440655e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.549305e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.563023e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.563023e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.217977 sec - 4,119,292,054 cycles # 1.855 GHz - 6,462,314,928 instructions # 1.57 insn per cycle - 2.222289185 seconds time elapsed +TOTAL : 2.181875 sec + 4,116,495,870 cycles # 1.884 GHz + 6,461,064,172 instructions # 1.57 insn per cycle + 2.186117492 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1647) (512y: 192) (512z: 9375) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index 0e734b6c9d..909bf4e735 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:26:58 +DATE: 2023-11-09_18:01:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.237666e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.262462e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.264647e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.239370e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.263076e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.265061e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.533653 sec - 2,219,666,724 cycles # 2.910 GHz - 3,445,153,040 instructions # 1.55 insn per cycle - 0.821091738 seconds time elapsed +TOTAL : 0.531588 sec + 2,281,405,083 cycles # 2.976 GHz + 3,558,676,633 instructions # 1.56 insn per cycle + 0.825879944 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.775197e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.803191e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.804422e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.775154e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.802017e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.803118e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.300230 sec - 10,634,484,052 cycles # 2.991 GHz - 23,844,861,281 instructions # 2.24 insn per cycle - 3.611693691 seconds time elapsed +TOTAL : 3.293832 sec + 10,794,008,612 cycles # 3.043 GHz + 23,569,569,961 instructions # 2.18 insn per cycle + 3.607202529 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.361422e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.361903e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.361903e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.420862e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.421336e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.421336e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 37.613350 sec - 113,653,626,732 cycles # 3.022 GHz - 144,966,182,806 instructions # 1.28 insn per cycle - 37.617592948 seconds time elapsed +TOTAL : 37.106861 sec + 113,630,776,289 cycles # 3.063 GHz + 144,980,863,935 instructions # 1.28 insn per cycle + 37.110990461 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:21605) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.197160e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.199710e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.199710e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.245783e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.248348e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.248348e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.138561 sec - 14,751,525,638 cycles # 2.870 GHz - 37,578,516,323 instructions # 2.55 insn per cycle - 5.143061031 seconds time elapsed +TOTAL : 5.061979 sec + 14,717,920,983 cycles # 2.906 GHz + 37,577,837,464 instructions # 2.55 insn per cycle + 5.066177833 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:68118) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.662015e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.676566e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.676566e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.791579e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.806069e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.806069e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.150367 sec - 6,125,090,080 cycles # 2.844 GHz - 13,063,740,704 instructions # 2.13 insn per cycle - 2.154679772 seconds time elapsed +TOTAL : 2.114146 sec + 6,120,754,225 cycles # 2.890 GHz + 13,063,521,271 instructions # 2.13 insn per cycle + 2.118343855 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:46960) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.263953e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.285040e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.285040e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.380050e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.401402e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.401402e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.780016 sec - 5,060,160,878 cycles # 2.837 GHz - 11,442,229,361 instructions # 2.26 insn per cycle - 1.784487029 seconds time elapsed +TOTAL : 1.757697 sec + 5,060,306,566 cycles # 2.873 GHz + 11,442,262,844 instructions # 2.26 insn per cycle + 1.761841609 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:40434) (512y: 285) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.515689e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.530167e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.530167e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.755291e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.769173e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.769173e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.192230 sec - 3,982,582,654 cycles # 1.814 GHz - 5,943,874,364 instructions # 1.49 insn per cycle - 2.196624515 seconds time elapsed +TOTAL : 2.124539 sec + 3,983,245,523 cycles # 1.872 GHz + 5,944,184,553 instructions # 1.49 insn per cycle + 2.128814459 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2455) (512y: 337) (512z:39411) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index a431669edb..8be167a2b3 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:28:07 +DATE: 2023-11-09_18:02:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.227099e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.252215e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.254306e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.258787e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.282651e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.285304e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.530677 sec - 2,254,800,400 cycles # 2.956 GHz - 3,541,881,168 instructions # 1.57 insn per cycle - 0.819833622 seconds time elapsed +TOTAL : 0.525087 sec + 2,271,033,028 cycles # 3.019 GHz + 3,503,626,972 instructions # 1.54 insn per cycle + 0.810303022 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.792463e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.821318e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.822521e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.795218e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.822430e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.823559e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.276536 sec - 10,598,798,874 cycles # 3.001 GHz - 22,505,546,793 instructions # 2.12 insn per cycle - 3.590880872 seconds time elapsed +TOTAL : 3.267432 sec + 10,775,752,309 cycles # 3.062 GHz + 23,804,895,620 instructions # 2.21 insn per cycle + 3.575584891 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.316847e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.317310e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.317310e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.382161e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.382658e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.382658e+02 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 38.002712 sec - 114,613,209,494 cycles # 3.016 GHz - 145,560,103,749 instructions # 1.27 insn per cycle - 38.007069023 seconds time elapsed +TOTAL : 37.434460 sec + 114,573,902,263 cycles # 3.060 GHz + 145,559,795,063 instructions # 1.27 insn per cycle + 37.438717752 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:22248) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.101440e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.103871e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.103871e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.172461e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.174968e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.174968e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.297737 sec - 15,180,958,119 cycles # 2.864 GHz - 37,765,704,407 instructions # 2.49 insn per cycle - 5.302092232 seconds time elapsed +TOTAL : 5.178309 sec + 15,150,664,399 cycles # 2.924 GHz + 37,765,142,558 instructions # 2.49 insn per cycle + 5.182585019 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:68446) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.750289e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.764988e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.764988e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.899691e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.915108e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.915108e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.125646 sec - 6,006,519,083 cycles # 2.821 GHz - 12,897,926,690 instructions # 2.15 insn per cycle - 2.130039886 seconds time elapsed +TOTAL : 2.085123 sec + 6,007,372,451 cycles # 2.876 GHz + 12,897,891,125 instructions # 2.15 insn per cycle + 2.089322243 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:45929) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.134516e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.155464e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.155464e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.290925e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.312116e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.312116e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.805195 sec - 5,111,264,978 cycles # 2.826 GHz - 11,448,660,091 instructions # 2.24 insn per cycle - 1.809562076 seconds time elapsed +TOTAL : 1.774574 sec + 5,109,183,395 cycles # 2.874 GHz + 11,448,665,866 instructions # 2.24 insn per cycle + 1.778819443 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:40123) (512y: 219) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.713307e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.727980e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.727980e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.900466e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.915540e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.915540e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.136153 sec - 3,956,606,945 cycles # 1.850 GHz - 5,898,384,643 instructions # 1.49 insn per cycle - 2.140545061 seconds time elapsed +TOTAL : 2.085227 sec + 3,957,731,000 cycles # 1.895 GHz + 5,897,967,734 instructions # 1.49 insn per cycle + 2.089481596 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1971) (512y: 259) (512z:38937) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 389fe370ef..24e6fadbe8 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:08:57 +DATE: 2023-11-09_17:45:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.330449e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.375316e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.385679e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.337209e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.383457e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.391632e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.478801 sec - 2,034,971,060 cycles # 2.940 GHz - 3,054,212,240 instructions # 1.50 insn per cycle - 0.749375620 seconds time elapsed +TOTAL : 0.480161 sec + 2,056,195,749 cycles # 2.969 GHz + 3,041,501,171 instructions # 1.48 insn per cycle + 0.751973888 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.529589e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.587136e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.589764e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.613057e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.675362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.678111e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.723184 sec - 5,782,983,871 cycles # 2.964 GHz - 12,066,403,823 instructions # 2.09 insn per cycle - 2.008243733 seconds time elapsed +TOTAL : 1.713246 sec + 5,908,983,228 cycles # 3.045 GHz + 11,684,311,184 instructions # 1.98 insn per cycle + 1.997404675 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.003677e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.004662e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.004662e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.054709e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.055772e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.055772e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.193664 sec - 24,655,416,435 cycles # 3.008 GHz - 78,134,412,275 instructions # 3.17 insn per cycle - 8.197717930 seconds time elapsed +TOTAL : 7.990756 sec + 24,645,365,645 cycles # 3.083 GHz + 78,136,702,059 instructions # 3.17 insn per cycle + 7.994878538 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.270897e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.285143e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.285143e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.432830e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.446994e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.446994e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.263632 sec - 6,475,526,341 cycles # 2.856 GHz - 20,124,982,632 instructions # 3.11 insn per cycle - 2.267936828 seconds time elapsed +TOTAL : 2.213938 sec + 6,478,911,538 cycles # 2.922 GHz + 20,124,199,414 instructions # 3.11 insn per cycle + 2.218115274 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.655891e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.662862e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.662862e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.680617e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.687674e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.687674e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.998679 sec - 2,840,454,971 cycles # 2.834 GHz - 6,992,590,525 instructions # 2.46 insn per cycle - 1.002898964 seconds time elapsed +TOTAL : 0.983889 sec + 2,838,821,051 cycles # 2.875 GHz + 6,991,598,423 instructions # 2.46 insn per cycle + 0.988065526 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.904708e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.914180e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.914180e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.841366e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.850029e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.850029e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.868982 sec - 2,491,374,231 cycles # 2.855 GHz - 6,299,681,276 instructions # 2.53 insn per cycle - 0.873227215 seconds time elapsed +TOTAL : 0.898688 sec + 2,488,990,380 cycles # 2.759 GHz + 6,298,918,188 instructions # 2.53 insn per cycle + 0.902843603 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.509691e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.515612e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.515612e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.538961e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.547910e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.547910e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.094413 sec - 2,048,957,877 cycles # 1.866 GHz - 3,269,073,408 instructions # 1.60 insn per cycle - 1.098654820 seconds time elapsed +TOTAL : 1.073829 sec + 2,048,858,820 cycles # 1.904 GHz + 3,269,526,835 instructions # 1.60 insn per cycle + 1.078196054 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 5a5ccf0962..741b2db05e 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:38:03 +DATE: 2023-11-09_18:12:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.621379e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.322960e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.322960e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.661835e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.358766e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.358766e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.467719 sec - 2,022,012,389 cycles # 2.930 GHz - 3,029,595,627 instructions # 1.50 insn per cycle - 0.748028952 seconds time elapsed +TOTAL : 0.465492 sec + 2,015,187,483 cycles # 2.973 GHz + 3,002,049,942 instructions # 1.49 insn per cycle + 0.734544576 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.232227e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.472561e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.472561e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.271779e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.483162e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.483162e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.900347 sec - 6,375,786,665 cycles # 2.982 GHz - 13,373,135,596 instructions # 2.10 insn per cycle - 2.195039568 seconds time elapsed +TOTAL : 1.878261 sec + 6,418,416,087 cycles # 3.037 GHz + 13,442,701,753 instructions # 2.09 insn per cycle + 2.169965161 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -104,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.008350e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.009347e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.009347e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.022346e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.023320e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.023320e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.176665 sec - 24,649,325,474 cycles # 3.013 GHz - 78,138,045,806 instructions # 3.17 insn per cycle - 8.180908705 seconds time elapsed +TOTAL : 8.120106 sec + 24,656,495,142 cycles # 3.035 GHz + 78,138,532,268 instructions # 3.17 insn per cycle + 8.124268827 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -132,14 +132,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.326247e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.339746e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.339746e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.385899e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.400170e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.400170e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.249404 sec - 6,483,421,678 cycles # 2.878 GHz - 20,133,640,820 instructions # 3.11 insn per cycle - 2.253658931 seconds time elapsed +TOTAL : 2.230708 sec + 6,485,115,953 cycles # 2.903 GHz + 20,133,634,822 instructions # 3.10 insn per cycle + 2.234788671 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.657895e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.664866e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.664866e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.666755e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.673825e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.673825e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.999874 sec - 2,846,897,865 cycles # 2.837 GHz - 7,001,448,108 instructions # 2.46 insn per cycle - 1.004235579 seconds time elapsed +TOTAL : 0.994493 sec + 2,844,577,237 cycles # 2.850 GHz + 7,001,609,472 instructions # 2.46 insn per cycle + 0.998731395 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -188,14 +188,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.899947e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.909346e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.909346e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.867923e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.876610e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.876610e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.873710 sec - 2,498,501,131 cycles # 2.848 GHz - 6,308,536,459 instructions # 2.52 insn per cycle - 0.877964105 seconds time elapsed +TOTAL : 0.888528 sec + 2,499,243,226 cycles # 2.802 GHz + 6,308,730,841 instructions # 2.52 insn per cycle + 0.892798888 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -216,14 +216,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.494285e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.499863e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.499863e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.495920e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.501735e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.501735e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.108704 sec - 2,059,473,334 cycles # 1.852 GHz - 3,279,338,884 instructions # 1.59 insn per cycle - 1.113120539 seconds time elapsed +TOTAL : 1.107724 sec + 2,056,932,102 cycles # 1.850 GHz + 3,279,291,488 instructions # 1.59 insn per cycle + 1.112281401 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 12ad22d5a3..341f303aae 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:49:50 +DATE: 2023-11-09_18:24:24 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.340393e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.392051e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.397944e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.311526e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.361390e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.366448e+05 ) sec^-1 MeanMatrixElemValue = ( 4.159397e-01 +- 3.238804e-01 ) GeV^-4 -TOTAL : 0.462195 sec - 1,986,930,742 cycles # 2.947 GHz - 3,005,964,493 instructions # 1.51 insn per cycle - 0.730831332 seconds time elapsed +TOTAL : 0.464707 sec + 2,008,078,996 cycles # 2.985 GHz + 3,036,723,964 instructions # 1.51 insn per cycle + 0.732085987 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --common WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.547500e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.620827e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.624055e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.547836e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.616999e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.620197e+05 ) sec^-1 MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.798053 sec - 6,062,916,752 cycles # 2.993 GHz - 11,569,516,184 instructions # 1.91 insn per cycle - 2.082278895 seconds time elapsed +TOTAL : 1.809045 sec + 6,020,726,960 cycles # 2.958 GHz + 11,569,273,710 instructions # 1.92 insn per cycle + 2.092173630 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.005661e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.006690e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.006690e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.048605e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.049604e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.049604e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 8.186459 sec - 24,671,953,454 cycles # 3.013 GHz - 78,137,621,710 instructions # 3.17 insn per cycle - 8.190517160 seconds time elapsed +TOTAL : 8.015102 sec + 24,651,277,493 cycles # 3.074 GHz + 78,133,763,667 instructions # 3.17 insn per cycle + 8.018994302 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.107458e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.120841e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.120841e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.377691e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.391250e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.391250e+03 ) sec^-1 MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.317106 sec - 6,488,771,451 cycles # 2.796 GHz - 20,124,539,496 instructions # 3.10 insn per cycle - 2.321142527 seconds time elapsed +TOTAL : 2.232285 sec + 6,481,088,653 cycles # 2.899 GHz + 20,124,382,938 instructions # 3.11 insn per cycle + 2.236275849 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.647793e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.654673e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.654673e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.686029e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.693351e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.693351e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 1.005506 sec - 2,843,966,049 cycles # 2.818 GHz - 6,991,496,346 instructions # 2.46 insn per cycle - 1.009548479 seconds time elapsed +TOTAL : 0.981416 sec + 2,838,446,580 cycles # 2.882 GHz + 6,989,000,726 instructions # 2.46 insn per cycle + 0.985356553 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.895349e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.904605e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.904605e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.921238e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.930307e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.930307e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.875040 sec - 2,495,845,822 cycles # 2.841 GHz - 6,297,369,404 instructions # 2.52 insn per cycle - 0.879134455 seconds time elapsed +TOTAL : 0.863261 sec + 2,495,681,706 cycles # 2.880 GHz + 6,297,112,783 instructions # 2.52 insn per cycle + 0.867346097 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.504042e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.510113e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.510113e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.544822e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.550907e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.550907e+04 ) sec^-1 MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.099941 sec - 2,050,409,457 cycles # 1.858 GHz - 3,265,015,309 instructions # 1.59 insn per cycle - 1.104007255 seconds time elapsed +TOTAL : 1.070627 sec + 2,048,550,465 cycles # 1.908 GHz + 3,265,201,106 instructions # 1.59 insn per cycle + 1.074629445 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt index 5b13ff9774..63178ad027 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:46:29 +DATE: 2023-11-09_18:21:06 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.339869e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.391844e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.397472e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.362546e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.415600e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.420893e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.461224 sec - 1,973,907,563 cycles # 2.940 GHz - 2,969,869,707 instructions # 1.50 insn per cycle - 0.729741448 seconds time elapsed +TOTAL : 0.460752 sec + 2,005,673,830 cycles # 2.989 GHz + 2,996,841,960 instructions # 1.49 insn per cycle + 0.729795900 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --curhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.563612e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.637504e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.640751e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.567426e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.636917e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.639909e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.749654 sec - 5,928,680,592 cycles # 2.999 GHz - 12,893,930,524 instructions # 2.17 insn per cycle - 2.033490620 seconds time elapsed +TOTAL : 1.748848 sec + 5,960,134,043 cycles # 3.018 GHz + 12,821,096,532 instructions # 2.15 insn per cycle + 2.031326515 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.014770e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.015759e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.015759e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.057810e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.058811e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.058811e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.147957 sec - 24,635,567,678 cycles # 3.022 GHz - 78,133,891,626 instructions # 3.17 insn per cycle - 8.152140443 seconds time elapsed +TOTAL : 7.977700 sec + 24,629,048,089 cycles # 3.086 GHz + 78,132,914,520 instructions # 3.17 insn per cycle + 7.981637101 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.062428e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.074909e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.074909e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.439696e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.453635e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.453635e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.330015 sec - 6,475,827,642 cycles # 2.775 GHz - 20,124,634,132 instructions # 3.11 insn per cycle - 2.334037311 seconds time elapsed +TOTAL : 2.212183 sec + 6,477,339,632 cycles # 2.924 GHz + 20,124,428,604 instructions # 3.11 insn per cycle + 2.216339188 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.595519e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.602006e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.602006e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.594939e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.601395e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.601395e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.036160 sec - 2,838,919,957 cycles # 2.730 GHz - 6,991,694,320 instructions # 2.46 insn per cycle - 1.040335460 seconds time elapsed +TOTAL : 1.036317 sec + 2,842,114,214 cycles # 2.733 GHz + 6,991,999,004 instructions # 2.46 insn per cycle + 1.040742925 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.893954e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.903085e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.903085e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.922697e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.931896e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.931896e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.873924 sec - 2,489,283,092 cycles # 2.837 GHz - 6,298,948,511 instructions # 2.53 insn per cycle - 0.878050091 seconds time elapsed +TOTAL : 0.860619 sec + 2,490,053,798 cycles # 2.883 GHz + 6,298,956,842 instructions # 2.53 insn per cycle + 0.864591382 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.497242e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.502884e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.502884e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.526848e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.532542e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.532542e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.103482 sec - 2,049,248,209 cycles # 1.852 GHz - 3,268,952,113 instructions # 1.60 insn per cycle - 1.107551558 seconds time elapsed +TOTAL : 1.082468 sec + 2,049,657,294 cycles # 1.888 GHz + 3,269,097,732 instructions # 1.59 insn per cycle + 1.086487061 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index cdb252ac3a..2548057249 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:43:11 +DATE: 2023-11-09_18:17:53 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -51,14 +51,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.764175e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.406414e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.411755e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.733376e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.369757e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.375069e+05 ) sec^-1 MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.466281 sec - 1,989,064,547 cycles # 2.930 GHz - 3,017,212,928 instructions # 1.52 insn per cycle - 0.737783039 seconds time elapsed +TOTAL : 0.461931 sec + 2,018,026,618 cycles # 3.001 GHz + 3,012,517,263 instructions # 1.49 insn per cycle + 0.729223266 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --rmbhst WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost @@ -71,14 +71,14 @@ WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.472408e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.626435e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.629621e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.494168e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.614081e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.617046e+05 ) sec^-1 MeanMatrixElemValue = ( 6.641710e+00 +- 4.994249e+00 ) GeV^-4 -TOTAL : 1.825252 sec - 6,129,357,136 cycles # 2.985 GHz - 13,024,512,874 instructions # 2.12 insn per cycle - 2.110041533 seconds time elapsed +TOTAL : 1.820293 sec + 6,248,771,087 cycles # 3.054 GHz + 13,452,131,003 instructions # 2.15 insn per cycle + 2.111868581 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -94,14 +94,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.017146e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.018188e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.018188e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.046417e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.047425e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.047425e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.138680 sec - 24,636,857,889 cycles # 3.027 GHz - 78,136,646,989 instructions # 3.17 insn per cycle - 8.142807331 seconds time elapsed +TOTAL : 8.022116 sec + 24,641,165,344 cycles # 3.070 GHz + 78,133,947,271 instructions # 3.17 insn per cycle + 8.026095295 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3602) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest.exe @@ -121,14 +121,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.266088e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.280126e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.280126e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.418320e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.431601e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.431601e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.265043 sec - 6,477,387,096 cycles # 2.855 GHz - 20,124,193,083 instructions # 3.11 insn per cycle - 2.269259910 seconds time elapsed +TOTAL : 2.218818 sec + 6,476,858,939 cycles # 2.915 GHz + 20,124,080,031 instructions # 3.11 insn per cycle + 2.222978465 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest.exe @@ -148,14 +148,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.644884e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.651718e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.651718e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.673631e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.680333e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.680333e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.005131 sec - 2,839,448,871 cycles # 2.816 GHz - 6,991,884,623 instructions # 2.46 insn per cycle - 1.009345557 seconds time elapsed +TOTAL : 0.987621 sec + 2,839,487,470 cycles # 2.865 GHz + 6,991,564,753 instructions # 2.46 insn per cycle + 0.991606693 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest.exe @@ -175,14 +175,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.866159e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.874920e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.874920e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.883610e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.892494e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.892494e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.887083 sec - 2,489,977,422 cycles # 2.796 GHz - 6,298,695,060 instructions # 2.53 insn per cycle - 0.891225776 seconds time elapsed +TOTAL : 0.878347 sec + 2,488,399,526 cycles # 2.822 GHz + 6,298,882,599 instructions # 2.53 insn per cycle + 0.882234875 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10822) (512y: 43) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest.exe @@ -202,14 +202,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.498745e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.504407e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.504407e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.534627e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.540393e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.540393e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.102380 sec - 2,046,697,565 cycles # 1.851 GHz - 3,268,682,926 instructions # 1.60 insn per cycle - 1.106464577 seconds time elapsed +TOTAL : 1.076545 sec + 2,047,724,498 cycles # 1.897 GHz + 3,268,770,442 instructions # 1.60 insn per cycle + 1.080450235 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2415) (512y: 46) (512z: 9571) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index 9fe77f3bb4..3e46ada377 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:09:27 +DATE: 2023-11-09_17:45:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.327293e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.373619e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.378917e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.305671e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.350688e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.358120e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.480093 sec - 2,046,992,398 cycles # 2.957 GHz - 3,008,261,627 instructions # 1.47 insn per cycle - 0.750577809 seconds time elapsed +TOTAL : 0.483459 sec + 2,029,909,968 cycles # 2.855 GHz + 2,962,980,745 instructions # 1.46 insn per cycle + 0.768081643 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.515177e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.572348e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.574911e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.574581e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.636147e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.638743e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.715757 sec - 5,871,373,370 cycles # 3.006 GHz - 12,204,738,560 instructions # 2.08 insn per cycle - 2.009775672 seconds time elapsed +TOTAL : 1.716632 sec + 5,921,965,881 cycles # 3.044 GHz + 11,852,981,757 instructions # 2.00 insn per cycle + 2.001901523 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.026797e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.027818e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.027818e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.062728e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.063773e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.063773e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.100053 sec - 24,563,227,881 cycles # 3.031 GHz - 77,860,200,084 instructions # 3.17 insn per cycle - 8.104232064 seconds time elapsed +TOTAL : 7.958572 sec + 24,559,190,224 cycles # 3.085 GHz + 77,859,989,303 instructions # 3.17 insn per cycle + 7.962642501 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3113) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.430084e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.444359e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.444359e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.583566e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.598037e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.598037e+03 ) sec^-1 MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.215968 sec - 6,421,588,621 cycles # 2.894 GHz - 20,090,220,099 instructions # 3.13 insn per cycle - 2.220335001 seconds time elapsed +TOTAL : 2.170856 sec + 6,426,627,449 cycles # 2.956 GHz + 20,090,039,565 instructions # 3.13 insn per cycle + 2.175014616 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13452) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.625861e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.632520e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.632520e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.591188e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.597484e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.597484e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.016598 sec - 2,906,571,537 cycles # 2.849 GHz - 7,134,546,428 instructions # 2.45 insn per cycle - 1.020819368 seconds time elapsed +TOTAL : 1.038604 sec + 2,902,688,212 cycles # 2.785 GHz + 7,133,529,057 instructions # 2.46 insn per cycle + 1.042821386 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:12261) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.810175e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.818358e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.818358e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.840190e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.848739e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.848739e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.914087 sec - 2,595,791,217 cycles # 2.828 GHz - 6,442,852,611 instructions # 2.48 insn per cycle - 0.918452804 seconds time elapsed +TOTAL : 0.898885 sec + 2,595,883,470 cycles # 2.877 GHz + 6,441,979,586 instructions # 2.48 insn per cycle + 0.902832877 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11276) (512y: 27) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.453251e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.458727e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.458727e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.492137e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.497778e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.497778e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.136514 sec - 2,124,554,510 cycles # 1.864 GHz - 3,431,456,558 instructions # 1.62 insn per cycle - 1.140688320 seconds time elapsed +TOTAL : 1.106744 sec + 2,123,250,955 cycles # 1.918 GHz + 3,431,574,417 instructions # 1.62 insn per cycle + 1.110853762 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2912) (512y: 22) (512z: 9647) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 6d22eac4d2..764181f824 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:29:17 +DATE: 2023-11-09_18:03:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.584275e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.627587e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.631963e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.601175e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.638676e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.643535e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.489018 sec - 2,081,067,674 cycles # 2.934 GHz - 3,133,776,802 instructions # 1.51 insn per cycle - 0.771988427 seconds time elapsed +TOTAL : 0.484907 sec + 2,101,276,759 cycles # 2.981 GHz + 3,149,706,582 instructions # 1.50 insn per cycle + 0.766736785 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.747350e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.808169e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.810857e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.695736e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.752372e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.754868e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.853996 sec - 6,275,481,753 cycles # 3.001 GHz - 12,514,155,894 instructions # 1.99 insn per cycle - 2.147936222 seconds time elapsed +TOTAL : 1.853224 sec + 6,303,125,801 cycles # 3.016 GHz + 12,982,819,660 instructions # 2.06 insn per cycle + 2.146815240 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.644036e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.644860e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.644860e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.841033e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.841866e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.841866e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 29.065327 sec - 87,424,924,787 cycles # 3.008 GHz - 135,567,300,472 instructions # 1.55 insn per cycle - 29.069446346 seconds time elapsed +TOTAL : 28.085663 sec + 86,167,672,431 cycles # 3.068 GHz + 135,565,357,772 instructions # 1.57 insn per cycle + 28.089696347 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:15486) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.026233e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.038857e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.038857e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.152037e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.164422e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.164422e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.342565 sec - 6,786,587,363 cycles # 2.893 GHz - 19,387,387,931 instructions # 2.86 insn per cycle - 2.346831164 seconds time elapsed +TOTAL : 2.302124 sec + 6,785,316,910 cycles # 2.944 GHz + 19,388,398,647 instructions # 2.86 insn per cycle + 2.306338036 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:69680) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.459444e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.464900e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.464900e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.500496e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.506041e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.506041e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.132478 sec - 3,179,013,562 cycles # 2.798 GHz - 6,809,043,401 instructions # 2.14 insn per cycle - 1.136902959 seconds time elapsed +TOTAL : 1.100781 sec + 3,177,227,261 cycles # 2.877 GHz + 6,808,813,623 instructions # 2.14 insn per cycle + 1.104867562 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:49077) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.738168e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.745907e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.745907e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.797362e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.805452e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.805452e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.952016 sec - 2,651,392,730 cycles # 2.774 GHz - 5,987,188,755 instructions # 2.26 insn per cycle - 0.956397839 seconds time elapsed +TOTAL : 0.920545 sec + 2,652,149,170 cycles # 2.870 GHz + 5,986,924,086 instructions # 2.26 insn per cycle + 0.924698406 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:42677) (512y: 11) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.472802e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.478184e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.478184e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.476030e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.481355e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.481355e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.121995 sec - 2,073,738,270 cycles # 1.843 GHz - 3,501,511,021 instructions # 1.69 insn per cycle - 1.126283052 seconds time elapsed +TOTAL : 1.119541 sec + 2,077,679,044 cycles # 1.851 GHz + 3,501,921,791 instructions # 1.69 insn per cycle + 1.123804705 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 5198) (512y: 3) (512z:44822) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index 5c9ad24a46..7b7c373ccc 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl1_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:30:09 +DATE: 2023-11-09_18:04:48 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.558233e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.598421e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.603327e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.541471e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.579175e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.583358e+05 ) sec^-1 MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.487345 sec - 2,076,063,570 cycles # 2.928 GHz - 3,124,474,063 instructions # 1.50 insn per cycle - 0.769324674 seconds time elapsed +TOTAL : 0.484837 sec + 2,105,287,248 cycles # 2.990 GHz + 3,132,361,933 instructions # 1.49 insn per cycle + 0.765772342 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.647182e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.706650e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.709351e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.694480e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.751016e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.753615e+05 ) sec^-1 MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.865548 sec - 6,314,327,402 cycles # 2.992 GHz - 13,540,816,282 instructions # 2.14 insn per cycle - 2.170188129 seconds time elapsed +TOTAL : 1.853183 sec + 6,341,276,975 cycles # 3.036 GHz + 13,434,801,047 instructions # 2.12 insn per cycle + 2.144878674 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.736423e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.737265e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.737265e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.834166e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.834994e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.834994e+02 ) sec^-1 MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 28.597065 sec - 86,035,998,776 cycles # 3.009 GHz - 135,911,265,736 instructions # 1.58 insn per cycle - 28.601145029 seconds time elapsed +TOTAL : 28.118822 sec + 86,081,697,198 cycles # 3.062 GHz + 135,906,074,576 instructions # 1.58 insn per cycle + 28.122852922 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:15910) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.976771e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.989628e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.989628e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.132688e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.145964e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.145964e+03 ) sec^-1 MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.358802 sec - 6,848,676,061 cycles # 2.899 GHz - 19,439,456,701 instructions # 2.84 insn per cycle - 2.362995374 seconds time elapsed +TOTAL : 2.306989 sec + 6,845,463,882 cycles # 2.963 GHz + 19,440,308,006 instructions # 2.84 insn per cycle + 2.311118522 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:69722) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.510619e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.516450e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.516450e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.544215e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.549994e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.549994e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 1.093889 sec - 3,110,977,160 cycles # 2.835 GHz - 6,719,869,092 instructions # 2.16 insn per cycle - 1.098127483 seconds time elapsed +TOTAL : 1.069611 sec + 3,120,065,313 cycles # 2.908 GHz + 6,719,636,670 instructions # 2.15 insn per cycle + 1.073683656 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:47667) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.794946e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.802956e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.802956e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.829756e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.837937e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.837937e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060903e+00 +- 2.367376e+00 ) GeV^-4 -TOTAL : 0.922821 sec - 2,627,235,427 cycles # 2.838 GHz - 5,970,250,488 instructions # 2.27 insn per cycle - 0.926978795 seconds time elapsed +TOTAL : 0.904097 sec + 2,625,695,846 cycles # 2.892 GHz + 5,970,269,399 instructions # 2.27 insn per cycle + 0.908318447 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:41842) (512y: 13) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.483560e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.489106e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.489106e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.517896e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.523661e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.523661e+04 ) sec^-1 MeanMatrixElemValue = ( 4.060904e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.114177 sec - 2,080,137,201 cycles # 1.861 GHz - 3,494,948,543 instructions # 1.68 insn per cycle - 1.118521627 seconds time elapsed +TOTAL : 1.088404 sec + 2,079,379,564 cycles # 1.905 GHz + 3,494,888,851 instructions # 1.68 insn per cycle + 1.092417864 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 4162) (512y: 4) (512z:44465) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index b38c13fcd9..93a0b75f12 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:09:56 +DATE: 2023-11-09_17:46:11 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.468828e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.491770e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.493892e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.470867e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.494695e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.496793e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.519876 sec - 2,215,127,737 cycles # 2.957 GHz - 3,487,212,374 instructions # 1.57 insn per cycle - 0.807913712 seconds time elapsed +TOTAL : 0.522613 sec + 2,231,403,620 cycles # 2.972 GHz + 3,427,736,994 instructions # 1.54 insn per cycle + 0.812895260 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.135164e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.161799e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.162966e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.127962e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.155846e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.156998e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.028303 sec - 9,769,796,186 cycles # 2.979 GHz - 22,335,132,843 instructions # 2.29 insn per cycle - 3.336784998 seconds time elapsed +TOTAL : 3.024603 sec + 10,040,286,484 cycles # 3.065 GHz + 20,701,312,854 instructions # 2.06 insn per cycle + 3.332453984 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.912244e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.913140e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.913140e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.954833e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.955774e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.955774e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.586099 sec - 25,914,180,302 cycles # 3.017 GHz - 79,445,505,152 instructions # 3.07 insn per cycle - 8.590406292 seconds time elapsed +TOTAL : 8.399305 sec + 25,922,061,106 cycles # 3.085 GHz + 79,443,494,538 instructions # 3.06 insn per cycle + 8.403427486 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4857) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.695684e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.699049e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.699049e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.761504e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.765123e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.765123e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.447189 sec - 12,656,450,439 cycles # 2.844 GHz - 38,554,825,829 instructions # 3.05 insn per cycle - 4.451478069 seconds time elapsed +TOTAL : 4.369308 sec + 12,659,894,478 cycles # 2.895 GHz + 38,554,080,405 instructions # 3.05 insn per cycle + 4.373596593 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:13161) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.537952e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.556620e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.556620e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.648175e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.665781e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.665781e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.930375 sec - 5,512,214,802 cycles # 2.850 GHz - 13,486,265,307 instructions # 2.45 insn per cycle - 1.934770358 seconds time elapsed +TOTAL : 1.905268 sec + 5,516,001,376 cycles # 2.890 GHz + 13,483,921,346 instructions # 2.44 insn per cycle + 1.909531551 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11242) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.638550e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.660856e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.660856e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.803935e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.827738e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.827738e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.711054 sec - 4,872,445,248 cycles # 2.842 GHz - 12,141,983,198 instructions # 2.49 insn per cycle - 1.715434660 seconds time elapsed +TOTAL : 1.682277 sec + 4,871,353,432 cycles # 2.890 GHz + 12,140,803,788 instructions # 2.49 insn per cycle + 1.686455915 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10154) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.406789e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.420159e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.420159e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.374652e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.387771e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.387771e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.223975 sec - 4,144,217,356 cycles # 1.862 GHz - 6,340,578,545 instructions # 1.53 insn per cycle - 2.228285470 seconds time elapsed +TOTAL : 2.233464 sec + 4,145,054,475 cycles # 1.853 GHz + 6,339,255,297 instructions # 1.53 insn per cycle + 2.237809120 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1802) (512y: 93) (512z: 9358) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index 46f37c0a90..5c4ca592f3 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2023-11-03_19:10:33 +DATE: 2023-11-09_17:46:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.484364e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.507714e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.509764e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.487617e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.512149e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.514706e+05 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.519968 sec - 2,216,873,411 cycles # 2.952 GHz - 3,459,675,597 instructions # 1.56 insn per cycle - 0.809738739 seconds time elapsed +TOTAL : 0.518884 sec + 2,241,934,817 cycles # 2.999 GHz + 3,518,298,272 instructions # 1.57 insn per cycle + 0.808606683 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.134555e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.161246e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.162402e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.131184e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.159088e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.160252e+05 ) sec^-1 MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.016486 sec - 9,822,814,204 cycles # 3.004 GHz - 22,339,986,571 instructions # 2.27 insn per cycle - 3.325238208 seconds time elapsed +TOTAL : 3.016798 sec + 10,040,228,896 cycles # 3.072 GHz + 22,037,859,926 instructions # 2.19 insn per cycle + 3.324922224 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.909809e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.910727e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.910727e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.950722e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.951656e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.951656e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.597381 sec - 25,939,435,501 cycles # 3.017 GHz - 79,457,351,519 instructions # 3.06 insn per cycle - 8.601657625 seconds time elapsed +TOTAL : 8.416183 sec + 25,916,224,646 cycles # 3.078 GHz + 79,453,865,963 instructions # 3.07 insn per cycle + 8.420247127 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 4504) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.664829e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.668218e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.668218e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.759672e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.763188e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.763188e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.484461 sec - 12,651,418,370 cycles # 2.819 GHz - 38,525,727,884 instructions # 3.05 insn per cycle - 4.488762135 seconds time elapsed +TOTAL : 4.371398 sec + 12,639,801,464 cycles # 2.889 GHz + 38,524,761,271 instructions # 3.05 insn per cycle + 4.375560053 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:12928) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.385701e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.404187e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.404187e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.630529e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.648410e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.648410e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.965077 sec - 5,557,225,506 cycles # 2.823 GHz - 13,610,780,927 instructions # 2.45 insn per cycle - 1.969439061 seconds time elapsed +TOTAL : 1.909613 sec + 5,559,227,570 cycles # 2.906 GHz + 13,609,303,550 instructions # 2.45 insn per cycle + 1.913823155 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:11327) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 9.328216e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.349743e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.349743e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.332740e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.353313e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.353313e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.767465 sec - 4,920,931,185 cycles # 2.779 GHz - 12,278,542,674 instructions # 2.50 insn per cycle - 1.771926617 seconds time elapsed +TOTAL : 1.766447 sec + 4,917,170,589 cycles # 2.778 GHz + 12,276,136,667 instructions # 2.50 insn per cycle + 1.770689432 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:10143) (512y: 239) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.389874e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.403004e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.403004e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.605174e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.618655e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.618655e+03 ) sec^-1 MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.228912 sec - 4,146,930,402 cycles # 1.858 GHz - 6,446,453,346 instructions # 1.55 insn per cycle - 2.233245374 seconds time elapsed +TOTAL : 2.166306 sec + 4,144,641,386 cycles # 1.911 GHz + 6,445,298,096 instructions # 1.56 insn per cycle + 2.170508580 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1627) (512y: 191) (512z: 9356) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 2048a9698e..b73b517066 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-11-03_19:12:52 +DATE: 2023-11-09_17:49:06 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.071850e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.072225e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.072335e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.070656e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.071067e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.071174e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.421447 sec - 8,245,731,454 cycles # 3.012 GHz - 18,688,279,165 instructions # 2.27 insn per cycle - 2.797097094 seconds time elapsed +TOTAL : 2.421343 sec + 8,332,807,450 cycles # 3.040 GHz + 16,939,230,243 instructions # 2.03 insn per cycle + 2.799270804 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.261920e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.263777e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.264034e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.271200e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.273122e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.273304e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.993277 sec - 12,924,149,664 cycles # 2.993 GHz - 29,920,520,122 instructions # 2.32 insn per cycle - 4.373104302 seconds time elapsed +TOTAL : 3.985063 sec + 13,247,174,015 cycles # 3.069 GHz + 30,019,215,878 instructions # 2.27 insn per cycle + 4.374841890 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.414546e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.414780e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.414780e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.228283e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.228511e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.228511e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.278557 sec - 18,784,400,880 cycles # 2.990 GHz - 53,915,743,321 instructions # 2.87 insn per cycle - 6.282578284 seconds time elapsed +TOTAL : 6.424546 sec + 18,798,364,918 cycles # 2.925 GHz + 53,916,162,526 instructions # 2.87 insn per cycle + 6.428517349 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32447) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.622225e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.622313e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.622313e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.657858e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.657947e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.657947e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.260349 sec - 9,843,353,366 cycles # 3.016 GHz - 27,093,120,012 instructions # 2.75 insn per cycle - 3.264542212 seconds time elapsed +TOTAL : 3.191098 sec + 9,844,225,763 cycles # 3.082 GHz + 27,092,778,504 instructions # 2.75 insn per cycle + 3.195159677 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96441) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.543297e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.543763e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.543763e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.638511e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.638939e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.638939e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.494911 sec - 4,247,565,583 cycles # 2.835 GHz - 9,561,660,282 instructions # 2.25 insn per cycle - 1.498994646 seconds time elapsed +TOTAL : 1.457101 sec + 4,229,207,978 cycles # 2.896 GHz + 9,561,222,824 instructions # 2.26 insn per cycle + 1.461220413 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.041064e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.041630e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.041630e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.119963e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.120507e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.120507e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.312043 sec - 3,711,873,932 cycles # 2.822 GHz - 8,485,580,977 instructions # 2.29 insn per cycle - 1.316064551 seconds time elapsed +TOTAL : 1.286739 sec + 3,714,427,423 cycles # 2.879 GHz + 8,485,272,385 instructions # 2.28 insn per cycle + 1.290826596 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.655846e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.656376e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.656376e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.600399e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.600911e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.600911e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.450066 sec - 2,692,078,825 cycles # 1.852 GHz - 4,273,245,565 instructions # 1.59 insn per cycle - 1.454158841 seconds time elapsed +TOTAL : 1.474924 sec + 2,695,875,361 cycles # 1.824 GHz + 4,273,169,567 instructions # 1.59 insn per cycle + 1.479057981 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index fbbae31086..28081b2160 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-11-03_19:38:33 +DATE: 2023-11-09_18:13:18 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.071334e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.072304e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.072304e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.064318e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.065254e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.065254e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.376547 sec - 8,066,576,997 cycles # 2.992 GHz - 17,224,378,863 instructions # 2.14 insn per cycle - 2.753340167 seconds time elapsed +TOTAL : 2.361712 sec + 8,164,385,199 cycles # 3.041 GHz + 16,942,565,052 instructions # 2.08 insn per cycle + 2.743176660 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.219956e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.252584e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.252584e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.190361e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.223459e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.223459e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.983566 sec - 12,755,700,095 cycles # 2.969 GHz - 26,780,853,821 instructions # 2.10 insn per cycle - 4.362214203 seconds time elapsed +TOTAL : 3.988233 sec + 13,123,079,634 cycles # 3.036 GHz + 28,841,455,416 instructions # 2.20 insn per cycle + 4.378479494 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -104,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.520548e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.520796e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.520796e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.307342e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.307565e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.307565e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.203566 sec - 18,829,459,475 cycles # 3.034 GHz - 53,915,868,697 instructions # 2.86 insn per cycle - 6.207586404 seconds time elapsed +TOTAL : 6.364568 sec + 18,927,213,544 cycles # 2.973 GHz + 53,918,164,087 instructions # 2.85 insn per cycle + 6.368577598 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32447) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest.exe @@ -132,14 +132,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.632618e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.632708e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.632708e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.666025e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.666114e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.666114e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.239187 sec - 9,805,468,555 cycles # 3.024 GHz - 27,094,086,958 instructions # 2.76 insn per cycle - 3.243245202 seconds time elapsed +TOTAL : 3.173683 sec + 9,797,609,023 cycles # 3.084 GHz + 27,093,782,808 instructions # 2.77 insn per cycle + 3.177702749 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96441) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.541893e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.542348e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.542348e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.255906e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.256265e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.256265e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.496042 sec - 4,247,154,617 cycles # 2.833 GHz - 9,562,315,517 instructions # 2.25 insn per cycle - 1.500165545 seconds time elapsed +TOTAL : 1.626420 sec + 4,592,212,308 cycles # 2.818 GHz + 9,562,781,549 instructions # 2.08 insn per cycle + 1.630448393 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest.exe @@ -188,14 +188,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.062512e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.063083e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.063083e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.133405e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.134023e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.134023e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.305609 sec - 3,707,362,205 cycles # 2.832 GHz - 8,486,374,508 instructions # 2.29 insn per cycle - 1.309600698 seconds time elapsed +TOTAL : 1.281959 sec + 3,704,600,058 cycles # 2.882 GHz + 8,486,385,133 instructions # 2.29 insn per cycle + 1.285885098 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:79991) (512y: 91) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest.exe @@ -216,14 +216,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.623189e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.623772e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.623772e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.663239e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.663889e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.663889e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.463361 sec - 2,697,367,089 cycles # 1.839 GHz - 4,274,143,132 instructions # 1.58 insn per cycle - 1.467446249 seconds time elapsed +TOTAL : 1.446962 sec + 2,696,700,654 cycles # 1.860 GHz + 4,274,559,971 instructions # 1.59 insn per cycle + 1.451147700 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2284) (512y: 105) (512z:79105) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index c51993cada..4570a77a9f 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-11-03_19:13:56 +DATE: 2023-11-09_17:50:09 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.063023e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.063394e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.063534e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.067332e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.067722e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.067853e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.424921 sec - 8,232,683,158 cycles # 2.990 GHz - 17,655,317,796 instructions # 2.14 insn per cycle - 2.812107310 seconds time elapsed +TOTAL : 2.421722 sec + 8,395,936,189 cycles # 3.053 GHz + 18,623,375,460 instructions # 2.22 insn per cycle + 2.807666336 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.268141e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.269954e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.270195e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.274592e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.276551e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.276737e+03 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.993040 sec - 12,961,046,511 cycles # 3.002 GHz - 29,041,240,897 instructions # 2.24 insn per cycle - 4.374135451 seconds time elapsed +TOTAL : 3.997048 sec + 13,290,560,155 cycles # 3.077 GHz + 29,230,575,077 instructions # 2.20 insn per cycle + 4.378333342 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.423791e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.424026e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.424026e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.641666e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.641939e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.641939e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.281065 sec - 18,737,351,960 cycles # 2.982 GHz - 53,924,990,961 instructions # 2.88 insn per cycle - 6.285160496 seconds time elapsed +TOTAL : 6.117499 sec + 18,785,945,280 cycles # 3.070 GHz + 53,927,524,861 instructions # 2.87 insn per cycle + 6.121375903 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32062) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.617244e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.617330e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.617330e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.649159e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.649256e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.649256e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.274940 sec - 9,810,206,221 cycles # 2.993 GHz - 27,090,315,670 instructions # 2.76 insn per cycle - 3.279033724 seconds time elapsed +TOTAL : 3.206425 sec + 9,787,082,067 cycles # 3.050 GHz + 27,089,817,225 instructions # 2.77 insn per cycle + 3.210577008 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96284) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.504500e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.504945e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.504945e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.558533e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.558987e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.558987e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.511756 sec - 4,249,692,377 cycles # 2.805 GHz - 9,561,658,782 instructions # 2.25 insn per cycle - 1.515796071 seconds time elapsed +TOTAL : 1.489258 sec + 4,261,284,391 cycles # 2.855 GHz + 9,561,306,757 instructions # 2.24 insn per cycle + 1.493274617 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84478) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.067567e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.068141e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.068141e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.116449e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.116994e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.116994e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.304248 sec - 3,697,935,435 cycles # 2.828 GHz - 8,485,512,243 instructions # 2.29 insn per cycle - 1.308302011 seconds time elapsed +TOTAL : 1.287600 sec + 3,697,517,464 cycles # 2.864 GHz + 8,485,532,294 instructions # 2.29 insn per cycle + 1.291548783 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:80014) (512y: 241) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.626044e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.626572e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.626572e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.666755e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.667279e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.667279e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.462511 sec - 2,704,261,685 cycles # 1.846 GHz - 4,277,565,036 instructions # 1.58 insn per cycle - 1.466688212 seconds time elapsed +TOTAL : 1.444368 sec + 2,694,896,725 cycles # 1.862 GHz + 4,276,159,790 instructions # 1.59 insn per cycle + 1.448419547 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2169) (512y: 187) (512z:79110) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 0a60ba6d62..4a0d02936a 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-11-03_19:14:59 +DATE: 2023-11-09_17:51:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.757584e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.758488e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.758845e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.745896e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.746749e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.746990e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.659896 sec - 5,702,631,198 cycles # 2.947 GHz - 11,810,983,379 instructions # 2.07 insn per cycle - 1.991424837 seconds time elapsed +TOTAL : 1.657612 sec + 5,852,337,885 cycles # 3.029 GHz + 12,128,434,322 instructions # 2.07 insn per cycle + 1.989363075 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.332515e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.333177e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.333265e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.334998e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.335676e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.335767e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.929687 sec - 6,546,483,377 cycles # 2.952 GHz - 14,155,312,120 instructions # 2.16 insn per cycle - 2.273547514 seconds time elapsed +TOTAL : 1.921239 sec + 6,689,269,410 cycles # 3.045 GHz + 13,766,829,986 instructions # 2.06 insn per cycle + 2.253627777 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.817807e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.818080e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.818080e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.077848e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.078128e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.078128e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.991502 sec - 17,897,297,418 cycles # 2.986 GHz - 53,590,305,749 instructions # 2.99 insn per cycle - 5.995609214 seconds time elapsed +TOTAL : 5.821316 sec + 17,888,760,787 cycles # 3.072 GHz + 53,591,267,283 instructions # 3.00 insn per cycle + 5.825272234 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.535145e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.535592e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.535592e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.576360e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.576807e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.576807e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.499015 sec - 4,559,682,745 cycles # 3.035 GHz - 13,762,791,022 instructions # 3.02 insn per cycle - 1.503172123 seconds time elapsed +TOTAL : 1.480982 sec + 4,560,162,627 cycles # 3.072 GHz + 13,762,313,674 instructions # 3.02 insn per cycle + 1.485020552 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.101340e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.103065e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.103065e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.154943e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.156669e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.156669e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.748885 sec - 2,136,693,329 cycles # 2.841 GHz - 4,817,082,222 instructions # 2.25 insn per cycle - 0.752876610 seconds time elapsed +TOTAL : 0.743454 sec + 2,138,545,582 cycles # 2.865 GHz + 4,816,682,793 instructions # 2.25 insn per cycle + 0.747370846 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.112158e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.114365e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.114365e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.228374e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.230533e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.230533e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.656308 sec - 1,869,942,366 cycles # 2.835 GHz - 4,274,318,244 instructions # 2.29 insn per cycle - 0.660301551 seconds time elapsed +TOTAL : 0.646748 sec + 1,869,005,080 cycles # 2.875 GHz + 4,273,904,960 instructions # 2.29 insn per cycle + 0.650625419 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.296564e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.298817e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.298817e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.373581e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.376135e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.376135e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.728798 sec - 1,352,736,555 cycles # 1.847 GHz - 2,158,877,197 instructions # 1.60 insn per cycle - 0.732817833 seconds time elapsed +TOTAL : 0.721971 sec + 1,354,973,724 cycles # 1.868 GHz + 2,158,504,507 instructions # 1.59 insn per cycle + 0.726042839 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2878) (512y: 49) (512z:79298) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 17034b30a2..b3edd3819c 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-11-03_19:39:36 +DATE: 2023-11-09_18:14:21 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.806522e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.808414e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.808414e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.797007e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.798750e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.798750e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187094e-05 +- 9.825664e-06 ) GeV^-6 -TOTAL : 1.595844 sec - 5,598,060,641 cycles # 2.994 GHz - 11,899,085,664 instructions # 2.13 insn per cycle - 1.927316991 seconds time elapsed +TOTAL : 1.595659 sec + 5,717,240,119 cycles # 3.061 GHz + 12,288,497,969 instructions # 2.15 insn per cycle + 1.924944467 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe -p 1 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.306726e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.320071e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.320071e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.290056e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.302765e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.302765e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856441e-04 +- 8.331096e-05 ) GeV^-6 -TOTAL : 1.896426 sec - 6,482,998,335 cycles # 2.990 GHz - 13,087,346,923 instructions # 2.02 insn per cycle - 2.228516012 seconds time elapsed +TOTAL : 1.886551 sec + 6,639,132,324 cycles # 3.056 GHz + 14,322,788,387 instructions # 2.16 insn per cycle + 2.229781396 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -104,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.982697e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.982966e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.982966e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.171261e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.171565e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.171565e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.882902 sec - 17,886,003,642 cycles # 3.039 GHz - 53,589,820,489 instructions # 3.00 insn per cycle - 5.886864227 seconds time elapsed +TOTAL : 5.764943 sec + 17,824,241,728 cycles # 3.090 GHz + 53,589,840,001 instructions # 3.01 insn per cycle + 5.768783827 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:20207) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest.exe @@ -132,14 +132,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.517559e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.518006e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.518006e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.577193e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.577612e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.577612e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.508545 sec - 4,560,262,414 cycles # 3.016 GHz - 13,763,353,615 instructions # 3.02 insn per cycle - 1.512732617 seconds time elapsed +TOTAL : 1.481390 sec + 4,567,533,848 cycles # 3.077 GHz + 13,763,213,169 instructions # 3.01 insn per cycle + 1.485335177 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96986) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.047943e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.049624e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.049624e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.234763e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.236470e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.236470e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.755133 sec - 2,153,006,129 cycles # 2.839 GHz - 4,818,213,561 instructions # 2.24 insn per cycle - 0.759225829 seconds time elapsed +TOTAL : 0.735214 sec + 2,134,795,694 cycles # 2.891 GHz + 4,817,744,368 instructions # 2.26 insn per cycle + 0.739133829 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84904) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest.exe @@ -188,14 +188,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.134004e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.136209e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.136209e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.254949e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.257396e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.257396e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.654556 sec - 1,870,329,136 cycles # 2.842 GHz - 4,274,869,931 instructions # 2.29 insn per cycle - 0.658687365 seconds time elapsed +TOTAL : 0.644560 sec + 1,871,614,525 cycles # 2.889 GHz + 4,274,807,727 instructions # 2.28 insn per cycle + 0.648424122 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:80610) (512y: 46) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest.exe @@ -216,14 +216,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.265196e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.267580e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.267580e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.456942e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.459224e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.459224e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.732395 sec - 1,354,970,411 cycles # 1.842 GHz - 2,159,667,135 instructions # 1.59 insn per cycle - 0.736399157 seconds time elapsed +TOTAL : 0.714093 sec + 1,353,332,363 cycles # 1.886 GHz + 2,159,539,680 instructions # 1.60 insn per cycle + 0.718064585 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2878) (512y: 49) (512z:79298) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 9247dc6a21..0346c64d8e 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-11-03_19:15:46 +DATE: 2023-11-09_17:51:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.757824e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.758656e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.758919e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.750539e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.751383e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.751707e+02 ) sec^-1 MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.658123 sec - 5,805,953,943 cycles # 3.008 GHz - 12,018,291,784 instructions # 2.07 insn per cycle - 1.988767308 seconds time elapsed +TOTAL : 1.659496 sec + 5,776,495,417 cycles # 2.991 GHz + 11,901,437,818 instructions # 2.06 insn per cycle + 2.001980183 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.327280e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.327957e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.328041e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.353072e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.353765e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.353865e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856829e-04 +- 8.333435e-05 ) GeV^-6 -TOTAL : 1.929152 sec - 6,666,976,802 cycles # 3.013 GHz - 13,831,721,664 instructions # 2.07 insn per cycle - 2.269150647 seconds time elapsed +TOTAL : 1.912117 sec + 6,490,117,914 cycles # 2.968 GHz + 14,058,143,997 instructions # 2.17 insn per cycle + 2.245070466 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.798758e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.799028e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.799028e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 9.137878e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.138152e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.138152e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.002749 sec - 17,897,748,334 cycles # 2.981 GHz - 53,583,210,251 instructions # 2.99 insn per cycle - 6.006727820 seconds time elapsed +TOTAL : 5.784705 sec + 17,870,079,189 cycles # 3.088 GHz + 53,579,576,519 instructions # 3.00 insn per cycle + 5.788683996 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:20206) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.533102e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.533527e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.533527e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.609484e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.609917e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.609917e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.498905 sec - 4,550,573,846 cycles # 3.029 GHz - 13,756,139,320 instructions # 3.02 insn per cycle - 1.503009468 seconds time elapsed +TOTAL : 1.467915 sec + 4,547,996,475 cycles # 3.091 GHz + 13,755,684,665 instructions # 3.02 insn per cycle + 1.471804589 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96606) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.049905e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.051589e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.051589e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.135956e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.137601e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.137601e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.753863 sec - 2,147,980,052 cycles # 2.837 GHz - 4,819,413,658 instructions # 2.24 insn per cycle - 0.757858909 seconds time elapsed +TOTAL : 0.744886 sec + 2,148,725,562 cycles # 2.872 GHz + 4,818,942,438 instructions # 2.24 insn per cycle + 0.748866334 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:85359) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 8.121398e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.123528e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.123528e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.165432e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.167702e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.167702e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.655569 sec - 1,875,337,702 cycles # 2.847 GHz - 4,276,013,202 instructions # 2.28 insn per cycle - 0.659452126 seconds time elapsed +TOTAL : 0.651347 sec + 1,877,062,772 cycles # 2.867 GHz + 4,276,072,949 instructions # 2.28 insn per cycle + 0.655395180 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:81075) (512y: 26) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.258028e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.260328e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.260328e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.338677e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.341123e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.341123e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.732438 sec - 1,358,895,231 cycles # 1.851 GHz - 2,165,631,438 instructions # 1.59 insn per cycle - 0.736476884 seconds time elapsed +TOTAL : 0.724588 sec + 1,360,263,123 cycles # 1.868 GHz + 2,164,996,305 instructions # 1.59 insn per cycle + 0.728742359 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3475) (512y: 34) (512z:79492) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 67db6760e6..8c7934b526 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-11-03_19:16:34 +DATE: 2023-11-09_17:52:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.697393e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.698008e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.698206e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.693982e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.694475e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.694605e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.168263 sec - 7,466,161,453 cycles # 3.002 GHz - 16,782,968,221 instructions # 2.25 insn per cycle - 2.544374597 seconds time elapsed +TOTAL : 2.169924 sec + 7,570,631,130 cycles # 3.042 GHz + 15,729,510,401 instructions # 2.08 insn per cycle + 2.547214982 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.111494e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.111753e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.111788e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.111663e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.111941e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.111967e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.403934 sec - 11,261,999,951 cycles # 3.015 GHz - 23,279,217,600 instructions # 2.07 insn per cycle - 3.795199307 seconds time elapsed +TOTAL : 3.399776 sec + 11,464,618,476 cycles # 3.079 GHz + 23,776,601,911 instructions # 2.07 insn per cycle + 3.779394913 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.891205e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.891420e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.891420e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.884667e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.884874e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.884874e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.695697 sec - 19,121,802,644 cycles # 2.855 GHz - 54,152,938,154 instructions # 2.83 insn per cycle - 6.699723618 seconds time elapsed +TOTAL : 6.698742 sec + 19,113,024,695 cycles # 2.852 GHz + 54,153,033,540 instructions # 2.83 insn per cycle + 6.702658032 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32066) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.589938e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.590022e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.590022e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.621402e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.621488e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.621488e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.327144 sec - 9,411,187,085 cycles # 2.826 GHz - 26,159,441,613 instructions # 2.78 insn per cycle - 3.331341639 seconds time elapsed +TOTAL : 3.261482 sec + 9,398,350,643 cycles # 2.879 GHz + 26,158,977,284 instructions # 2.78 insn per cycle + 3.265504352 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:96005) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.556465e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.556911e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.556911e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.791341e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.791883e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.791883e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.488421 sec - 4,038,495,427 cycles # 2.707 GHz - 9,228,280,089 instructions # 2.29 insn per cycle - 1.492543554 seconds time elapsed +TOTAL : 1.398109 sec + 4,039,627,179 cycles # 2.883 GHz + 9,228,162,054 instructions # 2.28 insn per cycle + 1.402192827 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:84155) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.276116e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.276827e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.276827e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.351031e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.351641e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.351641e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.240270 sec - 3,525,917,357 cycles # 2.835 GHz - 8,175,363,577 instructions # 2.32 insn per cycle - 1.244573424 seconds time elapsed +TOTAL : 1.218443 sec + 3,518,124,342 cycles # 2.879 GHz + 8,175,077,517 instructions # 2.32 insn per cycle + 1.222409560 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:79844) (512y: 79) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.671636e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.672174e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.672174e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.765628e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.766216e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.766216e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.443903 sec - 2,654,961,238 cycles # 1.834 GHz - 4,155,116,507 instructions # 1.57 insn per cycle - 1.448186385 seconds time elapsed +TOTAL : 1.407689 sec + 2,655,252,329 cycles # 1.882 GHz + 4,154,811,941 instructions # 1.56 insn per cycle + 1.411617738 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2045) (512y: 93) (512z:78760) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index ba876e5994..b26dd71707 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -DATE: 2023-11-03_19:17:35 +DATE: 2023-11-09_17:53:47 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.679011e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.679665e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.679866e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.674330e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.674838e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.674969e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.173579 sec - 7,474,410,637 cycles # 3.001 GHz - 15,946,585,145 instructions # 2.13 insn per cycle - 2.550103231 seconds time elapsed +TOTAL : 2.174009 sec + 7,611,935,314 cycles # 3.054 GHz + 16,836,441,609 instructions # 2.21 insn per cycle + 2.551658489 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe -p 1 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.109202e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.109461e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.109492e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.107370e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.107637e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.107663e+04 ) sec^-1 MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.402138 sec - 11,227,553,919 cycles # 3.005 GHz - 23,286,904,291 instructions # 2.07 insn per cycle - 3.792137186 seconds time elapsed +TOTAL : 3.413929 sec + 11,386,114,072 cycles # 3.048 GHz + 23,902,448,329 instructions # 2.10 insn per cycle + 3.794282526 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 7.862068e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.862272e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.862272e+01 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.931164e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.931386e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.931386e+01 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.723543 sec - 19,074,467,052 cycles # 2.836 GHz - 54,156,087,092 instructions # 2.84 insn per cycle - 6.727488337 seconds time elapsed +TOTAL : 6.662052 sec + 19,079,234,145 cycles # 2.863 GHz + 54,153,851,240 instructions # 2.84 insn per cycle + 6.666006074 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:32243) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.568667e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.568765e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.568765e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.620269e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.620358e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.620358e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.375716 sec - 9,382,313,393 cycles # 2.776 GHz - 26,079,058,590 instructions # 2.78 insn per cycle - 3.379999018 seconds time elapsed +TOTAL : 3.263602 sec + 9,383,434,712 cycles # 2.872 GHz + 26,078,178,648 instructions # 2.78 insn per cycle + 3.267785109 seconds time elapsed =Symbols in CPPProcess.o= (~sse4:95899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.662540e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.663002e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.663002e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.732412e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.732940e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.732940e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.448110 sec - 4,074,555,185 cycles # 2.807 GHz - 9,213,769,276 instructions # 2.26 insn per cycle - 1.452285529 seconds time elapsed +TOTAL : 1.420295 sec + 4,071,120,210 cycles # 2.859 GHz + 9,213,520,884 instructions # 2.26 insn per cycle + 1.424453149 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:83776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 4.250454e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.251202e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.251202e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.308670e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.309271e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.309271e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.248074 sec - 3,536,570,557 cycles # 2.826 GHz - 8,168,521,757 instructions # 2.31 insn per cycle - 1.252256213 seconds time elapsed +TOTAL : 1.231097 sec + 3,538,361,762 cycles # 2.867 GHz + 8,168,060,632 instructions # 2.31 insn per cycle + 1.234995598 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2:79373) (512y: 229) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.691090e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.691677e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.691677e+02 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.830037e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.830636e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.830636e+02 ) sec^-1 MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.437256 sec - 2,622,132,529 cycles # 1.820 GHz - 4,153,851,791 instructions # 1.58 insn per cycle - 1.441375266 seconds time elapsed +TOTAL : 1.385915 sec + 2,618,303,188 cycles # 1.885 GHz + 4,153,502,106 instructions # 1.59 insn per cycle + 1.389952232 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1492) (512y: 175) (512z:78776) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 32c5e2345e..6d792821e6 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-11-03_19:11:10 +DATE: 2023-11-09_17:47:24 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.931878e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.341004e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.663503e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.838115e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.336717e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.669956e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.446607 sec - 1,970,164,515 cycles # 2.938 GHz - 2,759,248,123 instructions # 1.40 insn per cycle - 0.729204009 seconds time elapsed +TOTAL : 0.441585 sec + 1,966,591,447 cycles # 2.991 GHz + 2,767,621,879 instructions # 1.41 insn per cycle + 0.715631755 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.710415e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.163714e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.497427e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.614381e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.150528e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.499874e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.523022 sec - 2,217,601,456 cycles # 2.938 GHz - 3,205,519,009 instructions # 1.45 insn per cycle - 0.813078242 seconds time elapsed +TOTAL : 0.519980 sec + 2,272,719,542 cycles # 3.015 GHz + 3,282,015,462 instructions # 1.44 insn per cycle + 0.810626224 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.073669e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.096220e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.096220e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.097912e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.120487e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.120487e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.548155 sec - 4,698,700,649 cycles # 3.029 GHz - 13,467,797,998 instructions # 2.87 insn per cycle - 1.552304744 seconds time elapsed +TOTAL : 1.514129 sec + 4,699,091,915 cycles # 3.096 GHz + 13,466,947,436 instructions # 2.87 insn per cycle + 1.518294228 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.948763e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.021816e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.021816e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.983607e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.058142e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.058142e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.862036 sec - 2,624,478,574 cycles # 3.032 GHz - 7,556,486,050 instructions # 2.88 insn per cycle - 0.866308924 seconds time elapsed +TOTAL : 0.847498 sec + 2,625,908,011 cycles # 3.086 GHz + 7,555,492,469 instructions # 2.88 insn per cycle + 0.851823974 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3095) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.306326e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.524533e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.524533e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.394636e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.619511e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.619511e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.517734 sec - 1,480,526,951 cycles # 2.839 GHz - 3,123,082,416 instructions # 2.11 insn per cycle - 0.522085763 seconds time elapsed +TOTAL : 0.504120 sec + 1,476,957,330 cycles # 2.909 GHz + 3,122,047,526 instructions # 2.11 insn per cycle + 0.508259108 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.669407e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.933881e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.933881e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.754841e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.026481e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.026481e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.468132 sec - 1,341,729,382 cycles # 2.844 GHz - 2,984,537,487 instructions # 2.22 insn per cycle - 0.472335074 seconds time elapsed +TOTAL : 0.457617 sec + 1,342,416,487 cycles # 2.911 GHz + 2,984,161,058 instructions # 2.22 insn per cycle + 0.461673437 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.279474e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.384367e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.384367e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.547509e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.672958e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.672958e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.743758 sec - 1,327,382,690 cycles # 1.776 GHz - 1,956,119,028 instructions # 1.47 insn per cycle - 0.747985259 seconds time elapsed +TOTAL : 0.666989 sec + 1,325,861,856 cycles # 1.977 GHz + 1,955,811,920 instructions # 1.48 insn per cycle + 0.671229633 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index 83cbc116b3..8337df6649 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-11-03_19:36:51 +DATE: 2023-11-09_18:11:37 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.568026e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.132079e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.132079e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.580013e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.253753e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.253753e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.473711 sec - 2,006,451,769 cycles # 2.929 GHz - 2,970,353,925 instructions # 1.48 insn per cycle - 0.742629859 seconds time elapsed +TOTAL : 0.470514 sec + 2,029,905,705 cycles # 2.983 GHz + 3,022,396,069 instructions # 1.49 insn per cycle + 0.739050820 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.250433e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.283042e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.283042e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.291351e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.372563e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.372563e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.748674 sec - 3,002,657,574 cycles # 2.966 GHz - 4,543,695,427 instructions # 1.51 insn per cycle - 1.069550305 seconds time elapsed +TOTAL : 0.742777 sec + 2,970,951,255 cycles # 2.999 GHz + 4,514,637,368 instructions # 1.52 insn per cycle + 1.047584901 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fgcheck.exe 2 64 2 @@ -104,14 +104,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.069178e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.091931e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.091931e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.084622e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.107537e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.107537e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.560726 sec - 4,731,718,585 cycles # 3.025 GHz - 13,472,168,375 instructions # 2.85 insn per cycle - 1.565141837 seconds time elapsed +TOTAL : 1.538614 sec + 4,724,111,132 cycles # 3.063 GHz + 13,474,132,709 instructions # 2.85 insn per cycle + 1.542829058 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 860) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest.exe @@ -132,14 +132,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.899999e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.973174e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.973174e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.968452e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.042732e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.042732e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.892296 sec - 2,670,244,018 cycles # 2.980 GHz - 7,605,526,435 instructions # 2.85 insn per cycle - 0.896907337 seconds time elapsed +TOTAL : 0.860148 sec + 2,657,657,312 cycles # 3.076 GHz + 7,605,024,054 instructions # 2.86 insn per cycle + 0.864557816 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3095) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.091835e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.296236e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.296236e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.339093e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.562110e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.562110e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.561077 sec - 1,524,432,631 cycles # 2.698 GHz - 3,172,781,548 instructions # 2.08 insn per cycle - 0.565642937 seconds time elapsed +TOTAL : 0.520524 sec + 1,514,451,185 cycles # 2.892 GHz + 3,172,765,595 instructions # 2.09 insn per cycle + 0.524939185 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2917) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest.exe @@ -188,14 +188,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.608228e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.871141e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.871141e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.708754e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.978270e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.978270e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.483758 sec - 1,382,209,807 cycles # 2.835 GHz - 3,035,256,040 instructions # 2.20 insn per cycle - 0.488244630 seconds time elapsed +TOTAL : 0.469539 sec + 1,371,933,121 cycles # 2.899 GHz + 3,033,200,949 instructions # 2.21 insn per cycle + 0.473789571 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2694) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest.exe @@ -216,14 +216,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.425183e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.544675e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.544675e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.533145e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.657118e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.657118e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.707575 sec - 1,368,070,277 cycles # 1.923 GHz - 1,995,483,449 instructions # 1.46 insn per cycle - 0.712159059 seconds time elapsed +TOTAL : 0.676505 sec + 1,357,238,089 cycles # 1.995 GHz + 1,995,412,477 instructions # 1.47 insn per cycle + 0.680880338 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1372) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 5c16312148..2ec6b9dc47 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_d_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-11-03_19:11:27 +DATE: 2023-11-09_17:47:41 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.898292e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.236740e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.548470e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.819082e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.206686e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.526015e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.444696 sec - 1,938,726,844 cycles # 2.937 GHz - 2,756,323,630 instructions # 1.42 insn per cycle - 0.718363875 seconds time elapsed +TOTAL : 0.443165 sec + 1,961,379,003 cycles # 2.989 GHz + 2,781,357,072 instructions # 1.42 insn per cycle + 0.713330689 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.682843e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.082328e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.409380e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.580414e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.034117e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.374431e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.522202 sec - 2,220,530,283 cycles # 2.941 GHz - 3,184,953,404 instructions # 1.43 insn per cycle - 0.811776517 seconds time elapsed +TOTAL : 0.527713 sec + 2,200,622,669 cycles # 2.860 GHz + 3,134,287,672 instructions # 1.42 insn per cycle + 0.826392715 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.070337e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.092872e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.092872e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.033805e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.055304e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.055304e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.552829 sec - 4,705,329,544 cycles # 3.023 GHz - 13,461,758,666 instructions # 2.86 insn per cycle - 1.556952692 seconds time elapsed +TOTAL : 1.607267 sec + 4,703,491,098 cycles # 2.920 GHz + 13,461,246,606 instructions # 2.86 insn per cycle + 1.611368977 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 849) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.948045e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.021952e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.021952e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.985735e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.061359e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.061359e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.862207 sec - 2,624,178,818 cycles # 3.031 GHz - 7,555,487,904 instructions # 2.88 insn per cycle - 0.866510467 seconds time elapsed +TOTAL : 0.845910 sec + 2,624,687,455 cycles # 3.090 GHz + 7,554,687,341 instructions # 2.88 insn per cycle + 0.850163593 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3088) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.292100e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.512278e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.512278e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.383208e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.600735e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.600735e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.519779 sec - 1,479,324,919 cycles # 2.825 GHz - 3,121,432,800 instructions # 2.11 insn per cycle - 0.524166869 seconds time elapsed +TOTAL : 0.505300 sec + 1,477,429,478 cycles # 2.904 GHz + 3,120,730,266 instructions # 2.11 insn per cycle + 0.509369657 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2900) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.586783e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.851292e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.851292e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.736623e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.003084e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.003084e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.479341 sec - 1,345,156,968 cycles # 2.785 GHz - 2,982,279,143 instructions # 2.22 insn per cycle - 0.483569808 seconds time elapsed +TOTAL : 0.460033 sec + 1,340,907,328 cycles # 2.892 GHz + 2,981,159,149 instructions # 2.22 insn per cycle + 0.464174349 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2670) (512y: 104) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.481639e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.600263e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.600263e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.537070e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.658764e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.658764e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.683909 sec - 1,326,826,217 cycles # 1.930 GHz - 1,955,120,469 instructions # 1.47 insn per cycle - 0.688253496 seconds time elapsed +TOTAL : 0.669277 sec + 1,326,031,179 cycles # 1.971 GHz + 1,954,098,862 instructions # 1.47 insn per cycle + 0.673467594 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1348) (512y: 106) (512z: 2173) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 59e9dbfb13..25d66c7041 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-11-03_19:11:45 +DATE: 2023-11-09_17:47:59 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.904199e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.231536e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.359887e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.746320e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.236957e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.360917e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.438419 sec - 1,915,720,301 cycles # 2.940 GHz - 2,722,845,778 instructions # 1.42 insn per cycle - 0.708695201 seconds time elapsed +TOTAL : 0.440483 sec + 1,942,018,247 cycles # 2.976 GHz + 2,734,614,888 instructions # 1.41 insn per cycle + 0.710582968 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.256707e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.834983e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.952518e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.010716e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.836484e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.960610e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.473385 sec - 2,068,832,196 cycles # 2.955 GHz - 2,965,580,704 instructions # 1.43 insn per cycle - 0.757067346 seconds time elapsed +TOTAL : 0.476376 sec + 2,093,828,578 cycles # 2.973 GHz + 2,983,215,115 instructions # 1.42 insn per cycle + 0.763925577 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.135878e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.161149e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.161149e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.150183e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.175828e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.175828e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.462461 sec - 4,454,737,328 cycles # 3.039 GHz - 13,053,159,453 instructions # 2.93 insn per cycle - 1.466494148 seconds time elapsed +TOTAL : 1.444504 sec + 4,454,034,181 cycles # 3.077 GHz + 13,052,158,813 instructions # 2.93 insn per cycle + 1.448436066 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.046237e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.238088e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.238088e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.075306e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.270472e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.270472e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.557296 sec - 1,699,998,155 cycles # 3.031 GHz - 4,515,681,552 instructions # 2.66 insn per cycle - 0.561435544 seconds time elapsed +TOTAL : 0.552218 sec + 1,700,873,014 cycles # 3.061 GHz + 4,515,081,496 instructions # 2.65 insn per cycle + 0.556201186 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3601) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.648399e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.355867e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.355867e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.031649e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.790374e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.790374e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.311054 sec - 851,131,460 cycles # 2.704 GHz - 1,899,263,660 instructions # 2.23 insn per cycle - 0.315235937 seconds time elapsed +TOTAL : 0.291602 sec + 850,563,357 cycles # 2.883 GHz + 1,898,510,633 instructions # 2.23 insn per cycle + 0.295657443 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.243995e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.098185e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.098185e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.014318e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.832565e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.832565e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.282792 sec - 800,211,416 cycles # 2.794 GHz - 1,822,370,089 instructions # 2.28 insn per cycle - 0.286974618 seconds time elapsed +TOTAL : 0.293482 sec + 802,625,962 cycles # 2.700 GHz + 1,821,591,063 instructions # 2.27 insn per cycle + 0.297764671 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe @@ -194,9 +194,9 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 29,217,754 cycles # 2.652 GHz - 42,284,295 instructions # 1.45 insn per cycle - 0.011406114 seconds time elapsed + 29,732,895 cycles # 2.697 GHz + 41,670,508 instructions # 1.40 insn per cycle + 0.011409242 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1969) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index f15afb12c1..687daa906c 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-11-03_19:37:10 +DATE: 2023-11-09_18:11:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -54,14 +54,14 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.572083e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.023629e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.023629e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.747186e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.237510e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.237510e+07 ) sec^-1 MeanMatrixElemValue = ( 2.017654e+01 +- 1.429184e+01 ) GeV^-2 -TOTAL : 0.454387 sec - 1,955,352,187 cycles # 2.938 GHz - 2,863,812,902 instructions # 1.46 insn per cycle - 0.722319097 seconds time elapsed +TOTAL : 0.451024 sec + 1,971,298,564 cycles # 2.989 GHz + 2,921,186,990 instructions # 1.48 insn per cycle + 0.718171404 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe -p 64 256 1 --bridge WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost @@ -80,14 +80,14 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.087118e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.599283e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.599283e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.154719e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.829239e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.829239e+07 ) sec^-1 MeanMatrixElemValue = ( 2.609942e+02 +- 2.115590e+02 ) GeV^-2 -TOTAL : 0.623566 sec - 2,498,674,729 cycles # 2.923 GHz - 3,766,117,574 instructions # 1.51 insn per cycle - 0.913465239 seconds time elapsed +TOTAL : 0.620701 sec + 2,514,914,307 cycles # 2.959 GHz + 3,812,117,615 instructions # 1.52 insn per cycle + 0.908673198 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fgcheck.exe 2 64 2 @@ -104,14 +104,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.124937e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.150391e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.150391e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.130761e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.156131e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.156131e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.480709 sec - 4,471,348,915 cycles # 3.013 GHz - 13,056,806,498 instructions # 2.92 insn per cycle - 1.485019275 seconds time elapsed +TOTAL : 1.472797 sec + 4,472,979,155 cycles # 3.030 GHz + 13,056,761,338 instructions # 2.92 insn per cycle + 1.477050712 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 745) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest.exe @@ -132,14 +132,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.015036e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.208624e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.208624e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.077738e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.274919e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.274919e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.567631 sec - 1,721,622,943 cycles # 3.014 GHz - 4,563,283,810 instructions # 2.65 insn per cycle - 0.571796628 seconds time elapsed +TOTAL : 0.555619 sec + 1,722,866,665 cycles # 3.081 GHz + 4,563,322,469 instructions # 2.65 insn per cycle + 0.559797755 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3601) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest.exe @@ -160,14 +160,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.904492e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.650265e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.650265e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.956375e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.689121e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.689121e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.302253 sec - 872,846,100 cycles # 2.852 GHz - 1,935,401,156 instructions # 2.22 insn per cycle - 0.306655862 seconds time elapsed +TOTAL : 0.298686 sec + 869,037,023 cycles # 2.875 GHz + 1,935,544,426 instructions # 2.23 insn per cycle + 0.302811266 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3491) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest.exe @@ -188,14 +188,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.271441e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.120717e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.120717e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.465666e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.344453e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.344453e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.285638 sec - 819,147,203 cycles # 2.831 GHz - 1,858,340,668 instructions # 2.27 insn per cycle - 0.289825539 seconds time elapsed +TOTAL : 0.276910 sec + 817,448,595 cycles # 2.915 GHz + 1,858,610,780 instructions # 2.27 insn per cycle + 0.280974833 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3335) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest.exe @@ -211,9 +211,9 @@ OK (relative difference <= 5E-3) runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check.exe -p 64 256 10 --bridge OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions WARNING! Instantiate host Bridge (nevt=16384) - 37,779,421 cycles # 2.664 GHz - 50,267,131 instructions # 1.33 insn per cycle - 0.014729622 seconds time elapsed + 37,531,426 cycles # 2.805 GHz + 50,366,354 instructions # 1.34 insn per cycle + 0.013813903 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1969) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index c8e32c45f6..8bc404b84b 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_f_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-11-03_19:12:01 +DATE: 2023-11-09_17:48:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.816263e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.233557e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.356584e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.693711e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.215042e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.339602e+08 ) sec^-1 MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.439029 sec - 1,906,384,387 cycles # 2.932 GHz - 2,668,630,925 instructions # 1.40 insn per cycle - 0.709025104 seconds time elapsed +TOTAL : 0.438778 sec + 1,941,964,603 cycles # 2.979 GHz + 2,729,283,404 instructions # 1.41 insn per cycle + 0.709465120 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 167 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.165457e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.788318e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.899924e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.971564e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.799531e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.917090e+08 ) sec^-1 MeanMatrixElemValue = ( 2.571361e+02 +- 2.114021e+02 ) GeV^-2 -TOTAL : 0.475153 sec - 2,060,825,458 cycles # 2.945 GHz - 2,959,751,148 instructions # 1.44 insn per cycle - 0.758667305 seconds time elapsed +TOTAL : 0.470244 sec + 2,084,172,928 cycles # 3.008 GHz + 2,971,888,877 instructions # 1.43 insn per cycle + 0.750810002 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.129555e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.154905e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.154905e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.156981e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.183336e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.183336e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.470613 sec - 4,452,780,841 cycles # 3.021 GHz - 13,033,295,085 instructions # 2.93 insn per cycle - 1.474743963 seconds time elapsed +TOTAL : 1.435690 sec + 4,451,626,158 cycles # 3.094 GHz + 13,032,987,489 instructions # 2.93 insn per cycle + 1.439578191 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.000043e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.190804e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.190804e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.129722e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.328624e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.328624e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018563e+01 +- 1.429902e+01 ) GeV^-2 -TOTAL : 0.566289 sec - 1,691,331,084 cycles # 2.968 GHz - 4,511,809,710 instructions # 2.67 insn per cycle - 0.570477990 seconds time elapsed +TOTAL : 0.542754 sec + 1,689,058,698 cycles # 3.092 GHz + 4,510,968,389 instructions # 2.67 insn per cycle + 0.546880720 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3589) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 5.392978e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.034440e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.034440e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.059640e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.837369e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.837369e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.325119 sec - 853,124,200 cycles # 2.596 GHz - 1,896,337,755 instructions # 2.22 insn per cycle - 0.329328797 seconds time elapsed +TOTAL : 0.290425 sec + 852,449,044 cycles # 2.901 GHz + 1,895,470,717 instructions # 2.22 insn per cycle + 0.294595816 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3461) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 6.399192e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.280649e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.280649e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.503379e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.376998e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.376998e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.275864 sec - 799,266,525 cycles # 2.860 GHz - 1,818,357,527 instructions # 2.28 insn per cycle - 0.279975539 seconds time elapsed +TOTAL : 0.271227 sec + 799,263,402 cycles # 2.909 GHz + 1,817,410,136 instructions # 2.27 insn per cycle + 0.275264605 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3298) (512y: 22) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest.exe @@ -194,9 +194,9 @@ OK (relative difference <= 5E-3) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check.exe -p 64 256 10 OMP= WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions - 28,695,242 cycles # 2.686 GHz - 41,682,313 instructions # 1.45 insn per cycle - 0.011083970 seconds time elapsed + 28,811,890 cycles # 2.702 GHz + 40,903,960 instructions # 1.42 insn per cycle + 0.011044926 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1932) (512y: 32) (512z: 2383) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 2f090614c3..eab7ec279c 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-11-03_19:12:17 +DATE: 2023-11-09_17:48:31 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.924011e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.312316e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.652376e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.897435e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.394394e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.726655e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.444193 sec - 1,982,708,723 cycles # 2.960 GHz - 2,773,326,834 instructions # 1.40 insn per cycle - 0.727594315 seconds time elapsed +TOTAL : 0.442647 sec + 2,004,900,013 cycles # 3.007 GHz + 2,826,895,466 instructions # 1.41 insn per cycle + 0.724412660 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.716781e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.189044e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.525460e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.620708e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.161875e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.511766e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.521362 sec - 2,209,841,644 cycles # 2.939 GHz - 3,173,284,555 instructions # 1.44 insn per cycle - 0.811280771 seconds time elapsed +TOTAL : 0.520790 sec + 2,257,400,704 cycles # 2.997 GHz + 3,259,917,218 instructions # 1.44 insn per cycle + 0.810908697 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.069544e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.093797e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.093797e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.088540e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.110933e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.110933e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.554251 sec - 4,735,824,731 cycles # 3.041 GHz - 13,470,683,397 instructions # 2.84 insn per cycle - 1.558385201 seconds time elapsed +TOTAL : 1.526966 sec + 4,723,154,452 cycles # 3.087 GHz + 13,469,602,667 instructions # 2.85 insn per cycle + 1.531097432 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 840) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.965218e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.040121e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.040121e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.988494e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.063440e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.063440e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.855114 sec - 2,601,303,673 cycles # 3.029 GHz - 7,389,579,625 instructions # 2.84 insn per cycle - 0.859411839 seconds time elapsed +TOTAL : 0.845345 sec + 2,599,329,855 cycles # 3.062 GHz + 7,388,612,618 instructions # 2.84 insn per cycle + 0.849529924 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3073) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.103178e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.304731e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.304731e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.404332e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.629825e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.629825e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.550950 sec - 1,470,989,933 cycles # 2.653 GHz - 3,058,765,662 instructions # 2.08 insn per cycle - 0.555184249 seconds time elapsed +TOTAL : 0.502979 sec + 1,466,711,057 cycles # 2.896 GHz + 3,057,623,965 instructions # 2.08 insn per cycle + 0.507143043 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 3013) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.774277e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.060098e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.060098e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.803609e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.085245e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.085245e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.455765 sec - 1,309,522,407 cycles # 2.852 GHz - 2,933,428,757 instructions # 2.24 insn per cycle - 0.459981977 seconds time elapsed +TOTAL : 0.452713 sec + 1,309,685,857 cycles # 2.871 GHz + 2,932,566,248 instructions # 2.24 insn per cycle + 0.456835979 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2799) (512y: 110) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.411920e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.526016e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.526016e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.397391e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.510097e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.510097e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.703353 sec - 1,366,582,014 cycles # 1.933 GHz - 1,972,774,215 instructions # 1.44 insn per cycle - 0.707707323 seconds time elapsed +TOTAL : 0.707515 sec + 1,366,670,273 cycles # 1.922 GHz + 1,971,774,412 instructions # 1.44 insn per cycle + 0.711692701 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1700) (512y: 114) (512z: 2171) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest.exe diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index f9fb6155f7..804124a528 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -41,7 +41,7 @@ CUDACPP_BUILDDIR='build.512z_m_inl0_hrd1' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2023-11-03_19:12:35 +DATE: 2023-11-09_17:48:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= @@ -50,14 +50,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.886874e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.228157e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.568514e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.811798e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.176696e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.495530e+07 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.444068 sec - 1,946,521,853 cycles # 2.951 GHz - 2,755,422,178 instructions # 1.42 insn per cycle - 0.717280231 seconds time elapsed +TOTAL : 0.443833 sec + 2,007,951,396 cycles # 2.999 GHz + 2,822,905,809 instructions # 1.41 insn per cycle + 0.728453943 seconds time elapsed runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe -p 64 256 1 WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 @@ -68,14 +68,14 @@ WARNING! CUDACPP_RUNTIME_ENABLEFPE is set: enable Floating Point Exceptions Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.675020e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.027076e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.349457e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.587196e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.041060e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.377539e+07 ) sec^-1 MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.523198 sec - 2,222,274,900 cycles # 2.946 GHz - 3,198,191,753 instructions # 1.44 insn per cycle - 0.813003520 seconds time elapsed +TOTAL : 0.523091 sec + 2,298,379,472 cycles # 2.986 GHz + 3,299,691,245 instructions # 1.44 insn per cycle + 0.827230276 seconds time elapsed ------------------------------------------------------------------------- cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/gcheck.exe --common -p 2 64 2 cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fgcheck.exe 2 64 2 @@ -91,14 +91,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.069395e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.091866e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.091866e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.081127e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.103599e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.103599e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.554132 sec - 4,733,166,680 cycles # 3.039 GHz - 13,456,716,984 instructions # 2.84 insn per cycle - 1.558278315 seconds time elapsed +TOTAL : 1.537190 sec + 4,726,723,623 cycles # 3.068 GHz + 13,455,766,194 instructions # 2.85 insn per cycle + 1.541247326 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 827) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest.exe @@ -118,14 +118,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 1.963106e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.038064e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.038064e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.984806e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.061569e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.061569e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.856001 sec - 2,603,447,344 cycles # 3.028 GHz - 7,393,362,148 instructions # 2.84 insn per cycle - 0.860294166 seconds time elapsed +TOTAL : 0.846326 sec + 2,602,293,302 cycles # 3.065 GHz + 7,392,635,608 instructions # 2.84 insn per cycle + 0.850454133 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 3062) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest.exe @@ -145,14 +145,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.354162e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.573385e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.573385e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.380134e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.599128e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.599128e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.509733 sec - 1,467,381,346 cycles # 2.859 GHz - 3,058,521,485 instructions # 2.08 insn per cycle - 0.513844239 seconds time elapsed +TOTAL : 0.506085 sec + 1,466,467,612 cycles # 2.876 GHz + 3,058,106,145 instructions # 2.09 insn per cycle + 0.510457197 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2990) (512y: 0) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest.exe @@ -172,14 +172,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 3.783084e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.065773e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.065773e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.778195e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.059768e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.059768e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.454796 sec - 1,307,019,802 cycles # 2.851 GHz - 2,934,565,738 instructions # 2.25 insn per cycle - 0.459066978 seconds time elapsed +TOTAL : 0.455384 sec + 1,311,774,111 cycles # 2.858 GHz + 2,933,399,487 instructions # 2.24 insn per cycle + 0.459674797 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 2775) (512y: 110) (512z: 0) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest.exe @@ -199,14 +199,14 @@ Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] OMP threads / `nproc --all` = 1 / 4 -EvtsPerSec[Rmb+ME] (23) = ( 2.408065e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.519741e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.519741e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.385780e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.497799e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.497799e+05 ) sec^-1 MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.704354 sec - 1,368,218,437 cycles # 1.933 GHz - 1,972,609,636 instructions # 1.44 insn per cycle - 0.708886358 seconds time elapsed +TOTAL : 0.711136 sec + 1,370,131,308 cycles # 1.917 GHz + 1,971,581,787 instructions # 1.44 insn per cycle + 0.715633425 seconds time elapsed =Symbols in CPPProcess.o= (~sse4: 0) (avx2: 1676) (512y: 114) (512z: 2171) ------------------------------------------------------------------------- runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest.exe