From 5c03c6664c72c8d7907618d0377b2c2ea2876385 Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Wed, 19 Nov 2025 01:09:02 +0100 Subject: [PATCH 01/17] Update postprocessor.py --- fixedTarget/batch/postprocessor.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fixedTarget/batch/postprocessor.py b/fixedTarget/batch/postprocessor.py index 6e0f8e0..319039c 100644 --- a/fixedTarget/batch/postprocessor.py +++ b/fixedTarget/batch/postprocessor.py @@ -1,5 +1,5 @@ import rucio_it_tools.rucio_it_register - +import os def check(j): file_list = [] for _f in j.outputfiles: @@ -12,9 +12,20 @@ def check(j): "size": _size, "adler32": _checksum }) - + metadata = { + "name" : j.name, + "ganga_id": j.id, + "completion_time" : j.time.final(), + "job_args" : j.application.args, + "nJobs" : str(len(j.subjobs)), + "creator": os.environ.get("USER"), + "comment": j.comment + } + + rucio_it_tools.rucio_it_register.register_files_with_structure( rse_name = "SHIP_TIER_0_DISK", files = file_list + metadata = metadata ) return True From c149aaeee214bff5eded31c55bc7ae09ebf79d5f Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Wed, 19 Nov 2025 01:10:07 +0100 Subject: [PATCH 02/17] Update postprocessor.py --- fixedTarget/batch/postprocessor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fixedTarget/batch/postprocessor.py b/fixedTarget/batch/postprocessor.py index 319039c..26903ab 100644 --- a/fixedTarget/batch/postprocessor.py +++ b/fixedTarget/batch/postprocessor.py @@ -14,8 +14,8 @@ def check(j): }) metadata = { "name" : j.name, - "ganga_id": j.id, - "completion_time" : j.time.final(), + "ganga_id": str(j.id), + "completion_time" : str(j.time.final()), "job_args" : j.application.args, "nJobs" : str(len(j.subjobs)), "creator": os.environ.get("USER"), From 3eb580a8a1b34f63e90a910cfe19b65a5bcb9fb8 Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Wed, 19 Nov 2025 01:13:39 +0100 Subject: [PATCH 03/17] Update postprocessor.py --- fixedTarget/batch/postprocessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fixedTarget/batch/postprocessor.py b/fixedTarget/batch/postprocessor.py index 26903ab..e564999 100644 --- a/fixedTarget/batch/postprocessor.py +++ b/fixedTarget/batch/postprocessor.py @@ -16,7 +16,7 @@ def check(j): "name" : j.name, "ganga_id": str(j.id), "completion_time" : str(j.time.final()), - "job_args" : j.application.args, + "job_args" : str([_a for _a in j.application.args]), "nJobs" : str(len(j.subjobs)), "creator": os.environ.get("USER"), "comment": j.comment From 91078b3e747aa2b9e37bee359bbf5496cf3e2b74 Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Wed, 19 Nov 2025 01:14:13 +0100 Subject: [PATCH 04/17] Update postprocessor.py --- fixedTarget/batch/postprocessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fixedTarget/batch/postprocessor.py b/fixedTarget/batch/postprocessor.py index e564999..d95b881 100644 --- a/fixedTarget/batch/postprocessor.py +++ b/fixedTarget/batch/postprocessor.py @@ -17,7 +17,7 @@ def check(j): "ganga_id": str(j.id), "completion_time" : str(j.time.final()), "job_args" : str([_a for _a in j.application.args]), - "nJobs" : str(len(j.subjobs)), + "n_jobs" : str(len(j.subjobs)), "creator": os.environ.get("USER"), "comment": j.comment } From b8281f451192a62dca5c2e00e71a80c6a970b255 Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Wed, 19 Nov 2025 04:40:14 +0100 Subject: [PATCH 05/17] Update gangaScript.py --- fixedTarget/batch/gangaScript.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fixedTarget/batch/gangaScript.py b/fixedTarget/batch/gangaScript.py index 9b54c99..3f5c1f4 100644 --- a/fixedTarget/batch/gangaScript.py +++ b/fixedTarget/batch/gangaScript.py @@ -10,8 +10,9 @@ j = Job(name = f'run fixed target production - {evtsToGen} events') j.application = Executable(exe = File('bashScript.sh'), args = ['-o', '"./"', '-n', evtsPerJob]) +# IMPORTANT: Only put the run seed in the splitter arguments j.splitter = ArgSplitter(args = [['-r', startRun + _i] for _i in range(nSJ)], append = True) -j.outputfiles = [LocalFile('*.root')] +j.outputfiles = [MassStorageFile('*.root')] j.backend = Condor() j.backend.cdf_options['+MaxRuntime'] = '1000' cc = CustomChecker(moduel = 'postprocessor.py') From 5587a954859ec51c8389aab55286bda83d1f3c15 Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Wed, 19 Nov 2025 05:17:38 +0100 Subject: [PATCH 06/17] Update gangaScript.py --- fixedTarget/batch/gangaScript.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fixedTarget/batch/gangaScript.py b/fixedTarget/batch/gangaScript.py index 3f5c1f4..b2e78b9 100644 --- a/fixedTarget/batch/gangaScript.py +++ b/fixedTarget/batch/gangaScript.py @@ -1,13 +1,16 @@ import time import os import random + +# Set this path to wherever you want the output to go +config['Output']['MassStorageFile']['uploadOptions']['path'] = '/eos/lhcb/user/m/masmith/mySHiPTest' + random.seed(os.environ.get("USER")) startRun = int(time.time()) + random.randint(0,10000) evtsPerJob = 10 #100000 evtsToGen = 100 nSJ = int(evtsToGen/evtsPerJob) - j = Job(name = f'run fixed target production - {evtsToGen} events') j.application = Executable(exe = File('bashScript.sh'), args = ['-o', '"./"', '-n', evtsPerJob]) # IMPORTANT: Only put the run seed in the splitter arguments From 8abf97829f057de129f42eacd012bfddc4a9e7cf Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Wed, 19 Nov 2025 05:33:41 +0100 Subject: [PATCH 07/17] Update postprocessor.py --- fixedTarget/batch/postprocessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fixedTarget/batch/postprocessor.py b/fixedTarget/batch/postprocessor.py index d95b881..f6949ea 100644 --- a/fixedTarget/batch/postprocessor.py +++ b/fixedTarget/batch/postprocessor.py @@ -6,7 +6,7 @@ def check(j): if isinstance(_f, MassStorageFile): _loc = _f.location() _size = rucio_it_tools.rucio_it_register.get_file_on_disk_size_in_bytes(_f) - _checksum = rucio_it_tools.rucio_it_register.get_file_on_disk_size_in_bytes(_f) + _checksum = rucio_it_tools.rucio_it_register.get_file_on_disk_adler32_checksum(_f) file_list.append({ "path": _loc, "size": _size, From 93dd4c64cffecfe9641274c8615e8b14553a3913 Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Wed, 19 Nov 2025 14:43:17 +0100 Subject: [PATCH 08/17] Update postprocessor.py --- fixedTarget/batch/postprocessor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fixedTarget/batch/postprocessor.py b/fixedTarget/batch/postprocessor.py index f6949ea..036a88c 100644 --- a/fixedTarget/batch/postprocessor.py +++ b/fixedTarget/batch/postprocessor.py @@ -17,6 +17,7 @@ def check(j): "ganga_id": str(j.id), "completion_time" : str(j.time.final()), "job_args" : str([_a for _a in j.application.args]), + "run_nos" : str([_sj.application.args[1] for _sj in j.subjobs]), "n_jobs" : str(len(j.subjobs)), "creator": os.environ.get("USER"), "comment": j.comment From ff46425c61803e78e50dbd1ff6234891fb023ecd Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Wed, 19 Nov 2025 15:34:59 +0100 Subject: [PATCH 09/17] update for production --- fixedTarget/batch/bashScript.sh | 15 ++++++--------- fixedTarget/batch/gangaScript.py | 6 ++++-- fixedTarget/batch/postprocessor.py | 31 +++++++++++++++++------------- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/fixedTarget/batch/bashScript.sh b/fixedTarget/batch/bashScript.sh index 866a674..365921e 100755 --- a/fixedTarget/batch/bashScript.sh +++ b/fixedTarget/batch/bashScript.sh @@ -1,18 +1,15 @@ #!/bin/bash -FS_INSTALL=/user/kskovpen/analysis/SHiP/FairShip +FS_INSTALL=/cvmfs/ship.cern.ch/25.11/ -source /cvmfs/ship.cern.ch/24.10/setUp.sh +export WORK_DIR=${FS_INSTALL}/sw/ +source ${FS_INSTALL}/slc9_x86-64/FairShip/latest/etc/profile.d/init.sh -export ALIBUILD_WORK_DIR=${FS_INSTALL}/sw +echo "INFO: Environment set up for FairShip located at " $FS_INSTALL -source ${FS_INSTALL}/../htcondor_submission_scripts/fixedTarget/batch/test_config.sh +echo "INFO: Executing: python ${FS_INSTALL}/slc9_x86-64/FairShip/latest/muonShieldOptimization/run_fixedTarget.py" $@ -echo "INFO: Environment set up for FairShip located at " $FAIRSHIP - -echo "INFO: Executing: python ${FAIRSHIP_INSTALL}/muonShieldOptimization/run_fixedTarget.py" $@ - -python ${FS_INSTALL}/muonShieldOptimization/run_fixedTarget.py $@ +python ${FS_INSTALL}/slc9_x86-64/FairShip/latest/muonShieldOptimization/run_fixedTarget.py $@ echo "INFO: Finished running. These files are on the WN:" ls -lh diff --git a/fixedTarget/batch/gangaScript.py b/fixedTarget/batch/gangaScript.py index b2e78b9..26f0959 100644 --- a/fixedTarget/batch/gangaScript.py +++ b/fixedTarget/batch/gangaScript.py @@ -8,7 +8,7 @@ random.seed(os.environ.get("USER")) startRun = int(time.time()) + random.randint(0,10000) evtsPerJob = 10 #100000 -evtsToGen = 100 +evtsToGen = 30 nSJ = int(evtsToGen/evtsPerJob) j = Job(name = f'run fixed target production - {evtsToGen} events') @@ -18,7 +18,9 @@ j.outputfiles = [MassStorageFile('*.root')] j.backend = Condor() j.backend.cdf_options['+MaxRuntime'] = '1000' -cc = CustomChecker(moduel = 'postprocessor.py') +# For running at CERN only +j.backend.cdf_options['accounting_group'] = 'group_u_SHIP.u_ship_cg' +cc = CustomChecker(module = 'postprocessor.py') j.postprocessors.append(cc) j.comment = f'{evtsPerJob} events in each of {nSJ} subjobs' j.submit() diff --git a/fixedTarget/batch/postprocessor.py b/fixedTarget/batch/postprocessor.py index 036a88c..0a761b9 100644 --- a/fixedTarget/batch/postprocessor.py +++ b/fixedTarget/batch/postprocessor.py @@ -2,22 +2,26 @@ import os def check(j): file_list = [] - for _f in j.outputfiles: - if isinstance(_f, MassStorageFile): - _loc = _f.location() - _size = rucio_it_tools.rucio_it_register.get_file_on_disk_size_in_bytes(_f) - _checksum = rucio_it_tools.rucio_it_register.get_file_on_disk_adler32_checksum(_f) - file_list.append({ - "path": _loc, - "size": _size, - "adler32": _checksum - }) + for _sj in j.subjobs: + if not _sj.status == 'completed': + print(f"WARNING: Subjobs {_sj.id} did not complete") + continue + for _f in j.outputfiles: + if isinstance(_f, MassStorageFile): + _loc = _f.location() + _size = rucio_it_tools.rucio_it_register.get_file_on_disk_size_in_bytes(_f) + _checksum = rucio_it_tools.rucio_it_register.get_file_on_disk_adler32_checksum(_f) + file_list.append({ + "path": _loc, + "size": _size, + "adler32": _checksum + }) metadata = { "name" : j.name, "ganga_id": str(j.id), "completion_time" : str(j.time.final()), "job_args" : str([_a for _a in j.application.args]), - "run_nos" : str([_sj.application.args[1] for _sj in j.subjobs]), + "run_nos" : str([(_sj.id, _sj.application.args[1]) for _sj in j.subjobs if _sj.status=='completed']), "n_jobs" : str(len(j.subjobs)), "creator": os.environ.get("USER"), "comment": j.comment @@ -26,7 +30,8 @@ def check(j): rucio_it_tools.rucio_it_register.register_files_with_structure( rse_name = "SHIP_TIER_0_DISK", - files = file_list - metadata = metadata + files = file_list, + metadata = metadata, + dry_run = True ) return True From 949afcf434c9a5c237a922bc6963bd6de020a867 Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Wed, 19 Nov 2025 17:23:25 +0100 Subject: [PATCH 10/17] more updates --- fixedTarget/batch/bashScript.sh | 6 +++--- fixedTarget/batch/gangaScript.py | 6 +++--- fixedTarget/batch/postprocessor.py | 29 +++++++++++++++++++---------- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/fixedTarget/batch/bashScript.sh b/fixedTarget/batch/bashScript.sh index 365921e..5149941 100755 --- a/fixedTarget/batch/bashScript.sh +++ b/fixedTarget/batch/bashScript.sh @@ -3,13 +3,13 @@ FS_INSTALL=/cvmfs/ship.cern.ch/25.11/ export WORK_DIR=${FS_INSTALL}/sw/ -source ${FS_INSTALL}/slc9_x86-64/FairShip/latest/etc/profile.d/init.sh +source ${FS_INSTALL}/sw/slc9_x86-64/FairShip/latest/etc/profile.d/init.sh echo "INFO: Environment set up for FairShip located at " $FS_INSTALL -echo "INFO: Executing: python ${FS_INSTALL}/slc9_x86-64/FairShip/latest/muonShieldOptimization/run_fixedTarget.py" $@ +echo "INFO: Executing: python ${FS_INSTALL}/sw/slc9_x86-64/FairShip/latest/muonShieldOptimization/run_fixedTarget.py" $@ -python ${FS_INSTALL}/slc9_x86-64/FairShip/latest/muonShieldOptimization/run_fixedTarget.py $@ +python ${FS_INSTALL}/sw/slc9_x86-64/FairShip/latest/muonShieldOptimization/run_fixedTarget.py $@ echo "INFO: Finished running. These files are on the WN:" ls -lh diff --git a/fixedTarget/batch/gangaScript.py b/fixedTarget/batch/gangaScript.py index 26f0959..1065672 100644 --- a/fixedTarget/batch/gangaScript.py +++ b/fixedTarget/batch/gangaScript.py @@ -7,15 +7,15 @@ random.seed(os.environ.get("USER")) startRun = int(time.time()) + random.randint(0,10000) -evtsPerJob = 10 #100000 -evtsToGen = 30 +evtsPerJob = 100 #100000 +evtsToGen = 200 nSJ = int(evtsToGen/evtsPerJob) j = Job(name = f'run fixed target production - {evtsToGen} events') j.application = Executable(exe = File('bashScript.sh'), args = ['-o', '"./"', '-n', evtsPerJob]) # IMPORTANT: Only put the run seed in the splitter arguments j.splitter = ArgSplitter(args = [['-r', startRun + _i] for _i in range(nSJ)], append = True) -j.outputfiles = [MassStorageFile('*.root')] +j.outputfiles = [MassStorageFile('pythia8_evtgen_Geant4_*.root')] j.backend = Condor() j.backend.cdf_options['+MaxRuntime'] = '1000' # For running at CERN only diff --git a/fixedTarget/batch/postprocessor.py b/fixedTarget/batch/postprocessor.py index 0a761b9..89d65d3 100644 --- a/fixedTarget/batch/postprocessor.py +++ b/fixedTarget/batch/postprocessor.py @@ -1,13 +1,20 @@ import rucio_it_tools.rucio_it_register import os + +if 'RUCIO_CONFIG' not in os.environ: + os.environ['RUCIO_CONFIG'] = '/afs/cern.ch/user/m/masmith/rucio_test/etc/rucio.cfg' + def check(j): + # Only do this on the master job + if j.master: + continue file_list = [] for _sj in j.subjobs: if not _sj.status == 'completed': print(f"WARNING: Subjobs {_sj.id} did not complete") continue for _f in j.outputfiles: - if isinstance(_f, MassStorageFile): + if '/eos/' in _f.location(): _loc = _f.location() _size = rucio_it_tools.rucio_it_register.get_file_on_disk_size_in_bytes(_f) _checksum = rucio_it_tools.rucio_it_register.get_file_on_disk_adler32_checksum(_f) @@ -21,17 +28,19 @@ def check(j): "ganga_id": str(j.id), "completion_time" : str(j.time.final()), "job_args" : str([_a for _a in j.application.args]), - "run_nos" : str([(_sj.id, _sj.application.args[1]) for _sj in j.subjobs if _sj.status=='completed']), + "run_nos" : str([(_sj.id, _sj.application.args[-1]) for _sj in j.subjobs if _sj.status=='completed']), "n_jobs" : str(len(j.subjobs)), "creator": os.environ.get("USER"), "comment": j.comment } - - - rucio_it_tools.rucio_it_register.register_files_with_structure( - rse_name = "SHIP_TIER_0_DISK", - files = file_list, - metadata = metadata, - dry_run = True - ) + print(f"INFO: File list - {file_list}") + print(f"INFO: metadata - {metadata}") + + if len(file_list)>0: + rucio_it_tools.rucio_it_register.register_files_with_structure( + rse_name = "SHIP_TIER_0_DISK", + files = file_list, + metadata = metadata, + dry_run = True + ) return True From d7950d4d5cb39ff84f9b899a96aa0f7d6d8849b8 Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Fri, 21 Nov 2025 06:24:47 +0100 Subject: [PATCH 11/17] refine --- fixedTarget/batch/README.md | 12 ++++++++++++ fixedTarget/batch/gangaScript.py | 12 +++++++++--- fixedTarget/batch/gangaSubmit.sh | 2 +- fixedTarget/batch/postprocessor.py | 22 ++++++++++++++-------- 4 files changed, 36 insertions(+), 12 deletions(-) diff --git a/fixedTarget/batch/README.md b/fixedTarget/batch/README.md index 02ddab2..1034bfd 100644 --- a/fixedTarget/batch/README.md +++ b/fixedTarget/batch/README.md @@ -1,3 +1,15 @@ +# Production submission + +1. Make sure the FairShip installation in `bashScript.sh` points to the version you want + +2. Make sure the MassStorageFile location in `gangaScript.py` points to wherever you want the data to go + +3. Make sure you have created a voms proxy for the file registration + +4. Use the gangaSubmit.sh script to submit the job. This is important - you need to use the ship version of ganga on CVMFS (not latest) + + + # Batch submission 1. Download the FairShip software: diff --git a/fixedTarget/batch/gangaScript.py b/fixedTarget/batch/gangaScript.py index 1065672..eb3b053 100644 --- a/fixedTarget/batch/gangaScript.py +++ b/fixedTarget/batch/gangaScript.py @@ -5,21 +5,27 @@ # Set this path to wherever you want the output to go config['Output']['MassStorageFile']['uploadOptions']['path'] = '/eos/lhcb/user/m/masmith/mySHiPTest' +# Now set up the random seed, how many events per subjobs and how many events total random.seed(os.environ.get("USER")) startRun = int(time.time()) + random.randint(0,10000) -evtsPerJob = 100 #100000 -evtsToGen = 200 +evtsPerJob = 10 #100000 +evtsToGen = 10 nSJ = int(evtsToGen/evtsPerJob) j = Job(name = f'run fixed target production - {evtsToGen} events') j.application = Executable(exe = File('bashScript.sh'), args = ['-o', '"./"', '-n', evtsPerJob]) + # IMPORTANT: Only put the run seed in the splitter arguments j.splitter = ArgSplitter(args = [['-r', startRun + _i] for _i in range(nSJ)], append = True) j.outputfiles = [MassStorageFile('pythia8_evtgen_Geant4_*.root')] j.backend = Condor() -j.backend.cdf_options['+MaxRuntime'] = '1000' +#j.backend.cdf_options['+MaxRuntime'] = '2000' +j.backend.cdf_options['+JobFlavour'] = '"longlunch"' + # For running at CERN only j.backend.cdf_options['accounting_group'] = 'group_u_SHIP.u_ship_cg' + +# Add in the postprocessor to do the file registration cc = CustomChecker(module = 'postprocessor.py') j.postprocessors.append(cc) j.comment = f'{evtsPerJob} events in each of {nSJ} subjobs' diff --git a/fixedTarget/batch/gangaSubmit.sh b/fixedTarget/batch/gangaSubmit.sh index e8c652b..18e8c83 100755 --- a/fixedTarget/batch/gangaSubmit.sh +++ b/fixedTarget/batch/gangaSubmit.sh @@ -1,4 +1,4 @@ #!/bin/bash -/cvmfs/ganga.cern.ch/runGanga.sh gangaScript.py +/cvmfs/ganga.cern.ch/Ganga/install/ship/bin/ganga gangaScript.py diff --git a/fixedTarget/batch/postprocessor.py b/fixedTarget/batch/postprocessor.py index 89d65d3..c6c1af2 100644 --- a/fixedTarget/batch/postprocessor.py +++ b/fixedTarget/batch/postprocessor.py @@ -1,23 +1,29 @@ +from GangaCore.GPIDev.Lib.File.MassStorageFile import MassStorageFile import rucio_it_tools.rucio_it_register import os +# This is a config file set up for SHiP if 'RUCIO_CONFIG' not in os.environ: - os.environ['RUCIO_CONFIG'] = '/afs/cern.ch/user/m/masmith/rucio_test/etc/rucio.cfg' + os.environ['RUCIO_CONFIG'] = '/cvmfs/ganga.cern.ch/Ganga/install/ship/rucio/etc/rucio.cfg' def check(j): # Only do this on the master job if j.master: - continue + print("ALL gone wrong") + return True file_list = [] for _sj in j.subjobs: if not _sj.status == 'completed': print(f"WARNING: Subjobs {_sj.id} did not complete") continue - for _f in j.outputfiles: - if '/eos/' in _f.location(): - _loc = _f.location() - _size = rucio_it_tools.rucio_it_register.get_file_on_disk_size_in_bytes(_f) - _checksum = rucio_it_tools.rucio_it_register.get_file_on_disk_adler32_checksum(_f) + for _f in _sj.outputfiles: +# print('a: ', _f.locations) + if isinstance(_f, MassStorageFile): + print('b') + _loc = _f.locations[0] + _size = rucio_it_tools.rucio_it_register.get_file_on_disk_size_in_bytes(_loc) + _checksum = rucio_it_tools.rucio_it_register.get_file_on_disk_adler32_checksum(_loc) +# print(_loc,' - ', _size, ' - ', _checksum) file_list.append({ "path": _loc, "size": _size, @@ -26,7 +32,7 @@ def check(j): metadata = { "name" : j.name, "ganga_id": str(j.id), - "completion_time" : str(j.time.final()), + "completion_time" : str(j.time.backend_final()), "job_args" : str([_a for _a in j.application.args]), "run_nos" : str([(_sj.id, _sj.application.args[-1]) for _sj in j.subjobs if _sj.status=='completed']), "n_jobs" : str(len(j.subjobs)), From 00caf2e95c079fe3850f6d0e847202de37b10aa3 Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Fri, 21 Nov 2025 06:47:06 +0100 Subject: [PATCH 12/17] refine --- fixedTarget/batch/README.md | 1 + fixedTarget/batch/gangaScript.py | 2 +- fixedTarget/batch/postprocessor.py | 9 +++------ 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fixedTarget/batch/README.md b/fixedTarget/batch/README.md index 1034bfd..840a87b 100644 --- a/fixedTarget/batch/README.md +++ b/fixedTarget/batch/README.md @@ -8,6 +8,7 @@ 4. Use the gangaSubmit.sh script to submit the job. This is important - you need to use the ship version of ganga on CVMFS (not latest) +5. To monitor your jobs use this ganga: /cvmfs/ganga.cern.ch/Ganga/install/ship/bin/ganga . Don't forget to have a valid voms proxy! # Batch submission diff --git a/fixedTarget/batch/gangaScript.py b/fixedTarget/batch/gangaScript.py index eb3b053..786bde6 100644 --- a/fixedTarget/batch/gangaScript.py +++ b/fixedTarget/batch/gangaScript.py @@ -3,7 +3,7 @@ import random # Set this path to wherever you want the output to go -config['Output']['MassStorageFile']['uploadOptions']['path'] = '/eos/lhcb/user/m/masmith/mySHiPTest' +config['Output']['MassStorageFile']['uploadOptions']['path'] = '/eos/experiment/ship/test' # Now set up the random seed, how many events per subjobs and how many events total random.seed(os.environ.get("USER")) diff --git a/fixedTarget/batch/postprocessor.py b/fixedTarget/batch/postprocessor.py index c6c1af2..34294aa 100644 --- a/fixedTarget/batch/postprocessor.py +++ b/fixedTarget/batch/postprocessor.py @@ -9,7 +9,6 @@ def check(j): # Only do this on the master job if j.master: - print("ALL gone wrong") return True file_list = [] for _sj in j.subjobs: @@ -17,9 +16,7 @@ def check(j): print(f"WARNING: Subjobs {_sj.id} did not complete") continue for _f in _sj.outputfiles: -# print('a: ', _f.locations) if isinstance(_f, MassStorageFile): - print('b') _loc = _f.locations[0] _size = rucio_it_tools.rucio_it_register.get_file_on_disk_size_in_bytes(_loc) _checksum = rucio_it_tools.rucio_it_register.get_file_on_disk_adler32_checksum(_loc) @@ -39,14 +36,14 @@ def check(j): "creator": os.environ.get("USER"), "comment": j.comment } - print(f"INFO: File list - {file_list}") - print(f"INFO: metadata - {metadata}") +# print(f"INFO: File list - {file_list}") +# print(f"INFO: metadata - {metadata}") if len(file_list)>0: rucio_it_tools.rucio_it_register.register_files_with_structure( rse_name = "SHIP_TIER_0_DISK", files = file_list, metadata = metadata, - dry_run = True +# dry_run = True ) return True From cf7f57b14d0f7c3fccc021026246a8a6f978100b Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Fri, 21 Nov 2025 17:27:33 +0100 Subject: [PATCH 13/17] Update gangaScript.py --- fixedTarget/batch/gangaScript.py | 1 + 1 file changed, 1 insertion(+) diff --git a/fixedTarget/batch/gangaScript.py b/fixedTarget/batch/gangaScript.py index 786bde6..d629f4a 100644 --- a/fixedTarget/batch/gangaScript.py +++ b/fixedTarget/batch/gangaScript.py @@ -4,6 +4,7 @@ # Set this path to wherever you want the output to go config['Output']['MassStorageFile']['uploadOptions']['path'] = '/eos/experiment/ship/test' +config['Output']['MassStorageFile']['uploadOptions']['defaultProtocol'] = 'root://eospublic.cern.ch' # Now set up the random seed, how many events per subjobs and how many events total random.seed(os.environ.get("USER")) From da07c0b5db96dae14de20ba8e9e51ea747a30674 Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Wed, 26 Nov 2025 14:33:26 +0100 Subject: [PATCH 14/17] Update postprocessor.py --- fixedTarget/batch/postprocessor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fixedTarget/batch/postprocessor.py b/fixedTarget/batch/postprocessor.py index 34294aa..b8388c9 100644 --- a/fixedTarget/batch/postprocessor.py +++ b/fixedTarget/batch/postprocessor.py @@ -27,7 +27,7 @@ def check(j): "adler32": _checksum }) metadata = { - "name" : j.name, + "title" : j.name, "ganga_id": str(j.id), "completion_time" : str(j.time.backend_final()), "job_args" : str([_a for _a in j.application.args]), From ba86025766dca3cc58e27bb4d77fc2b734661747 Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Wed, 10 Dec 2025 01:20:23 +0100 Subject: [PATCH 15/17] fixes --- fixedTarget/batch/gangaScript.py | 6 +- fixedTarget/batch/postprocessor.py | 79 +++++++++++++---------- fixedTarget/batch/postprocessor_master.py | 58 +++++++++++++++++ 3 files changed, 108 insertions(+), 35 deletions(-) create mode 100644 fixedTarget/batch/postprocessor_master.py diff --git a/fixedTarget/batch/gangaScript.py b/fixedTarget/batch/gangaScript.py index d629f4a..6f2360f 100644 --- a/fixedTarget/batch/gangaScript.py +++ b/fixedTarget/batch/gangaScript.py @@ -3,14 +3,14 @@ import random # Set this path to wherever you want the output to go -config['Output']['MassStorageFile']['uploadOptions']['path'] = '/eos/experiment/ship/test' +config['Output']['MassStorageFile']['uploadOptions']['path'] = '/eos/experiment/ship/test/masmith_test/' config['Output']['MassStorageFile']['uploadOptions']['defaultProtocol'] = 'root://eospublic.cern.ch' # Now set up the random seed, how many events per subjobs and how many events total random.seed(os.environ.get("USER")) startRun = int(time.time()) + random.randint(0,10000) evtsPerJob = 10 #100000 -evtsToGen = 10 +evtsToGen = 20 nSJ = int(evtsToGen/evtsPerJob) j = Job(name = f'run fixed target production - {evtsToGen} events') @@ -19,6 +19,7 @@ # IMPORTANT: Only put the run seed in the splitter arguments j.splitter = ArgSplitter(args = [['-r', startRun + _i] for _i in range(nSJ)], append = True) j.outputfiles = [MassStorageFile('pythia8_evtgen_Geant4_*.root')] +#j.backend = Local() j.backend = Condor() #j.backend.cdf_options['+MaxRuntime'] = '2000' j.backend.cdf_options['+JobFlavour'] = '"longlunch"' @@ -28,6 +29,7 @@ # Add in the postprocessor to do the file registration cc = CustomChecker(module = 'postprocessor.py') +#cc = CustomChecker(module = 'postprocessor_master.py') j.postprocessors.append(cc) j.comment = f'{evtsPerJob} events in each of {nSJ} subjobs' j.submit() diff --git a/fixedTarget/batch/postprocessor.py b/fixedTarget/batch/postprocessor.py index b8388c9..70ef267 100644 --- a/fixedTarget/batch/postprocessor.py +++ b/fixedTarget/batch/postprocessor.py @@ -1,49 +1,62 @@ from GangaCore.GPIDev.Lib.File.MassStorageFile import MassStorageFile import rucio_it_tools.rucio_it_register import os +import uproot # This is a config file set up for SHiP if 'RUCIO_CONFIG' not in os.environ: os.environ['RUCIO_CONFIG'] = '/cvmfs/ganga.cern.ch/Ganga/install/ship/rucio/etc/rucio.cfg' def check(j): - # Only do this on the master job - if j.master: + # We don't want to run on the master job + if j.subjobs: + print("this is a master job") return True file_list = [] - for _sj in j.subjobs: - if not _sj.status == 'completed': - print(f"WARNING: Subjobs {_sj.id} did not complete") - continue - for _f in _sj.outputfiles: - if isinstance(_f, MassStorageFile): - _loc = _f.locations[0] - _size = rucio_it_tools.rucio_it_register.get_file_on_disk_size_in_bytes(_loc) - _checksum = rucio_it_tools.rucio_it_register.get_file_on_disk_adler32_checksum(_loc) + for _f in j.outputfiles: + if isinstance(_f, MassStorageFile): + _loc = _f.locations[0] + # Check you can open it +# try: +# _tfile = uproot.open(_loc) +# _tfile["cbmsim"] +# _tfile.close() +# except uproot.deserialization.DeserializationError: +# print(f"ERROR: Unable to open {_loc}! Job likely failed!") +# _tfile.close() +# return False + _size = rucio_it_tools.rucio_it_register.get_file_on_disk_size_in_bytes(_loc) + _checksum = rucio_it_tools.rucio_it_register.get_file_on_disk_adler32_checksum(_loc) # print(_loc,' - ', _size, ' - ', _checksum) - file_list.append({ - "path": _loc, - "size": _size, - "adler32": _checksum - }) + file_list.append({ + "path": _loc, + "size": _size, + "adler32": _checksum + }) metadata = { - "title" : j.name, - "ganga_id": str(j.id), - "completion_time" : str(j.time.backend_final()), - "job_args" : str([_a for _a in j.application.args]), - "run_nos" : str([(_sj.id, _sj.application.args[-1]) for _sj in j.subjobs if _sj.status=='completed']), - "n_jobs" : str(len(j.subjobs)), - "creator": os.environ.get("USER"), - "comment": j.comment - } -# print(f"INFO: File list - {file_list}") -# print(f"INFO: metadata - {metadata}") + "title" : j.name, + "ganga_id": str(j.fqid), + "completion_time" : str(j.time.backend_final()), + "job_args" : str([_a for _a in j.application.args]), + "run_nos" : str((j.fqid, j.application.args[-1])), + "n_jobs" : str(len(j.master.subjobs)), + "creator": os.environ.get("USER"), + "comment": j.comment + } + print(f"INFO: File list - {file_list}") + print(f"INFO: metadata - {metadata}") if len(file_list)>0: - rucio_it_tools.rucio_it_register.register_files_with_structure( - rse_name = "SHIP_TIER_0_DISK", - files = file_list, - metadata = metadata, -# dry_run = True - ) + try: + rucio_it_tools.rucio_it_register.register_files_with_structure( + rse_name = "SHIP_TIER_0_DISK", + files = file_list, + metadata = metadata, + dry_run = True # Comment this when you run it for real + ) + except: + print("ERROR: Not able to register file {file_list} with rucio") + j.force_status("failed") + j.comment += " - Rucio registration failed" + return False return True diff --git a/fixedTarget/batch/postprocessor_master.py b/fixedTarget/batch/postprocessor_master.py new file mode 100644 index 0000000..d9a6866 --- /dev/null +++ b/fixedTarget/batch/postprocessor_master.py @@ -0,0 +1,58 @@ +from GangaCore.GPIDev.Lib.File.MassStorageFile import MassStorageFile +import rucio_it_tools.rucio_it_register +import os + +# This is a config file set up for SHiP +if 'RUCIO_CONFIG' not in os.environ: + os.environ['RUCIO_CONFIG'] = '/cvmfs/ganga.cern.ch/Ganga/install/ship/rucio/etc/rucio.cfg' + +def check(j): + # Only do this on the master job + if j.master: + return True + file_list = [] + for _sj in j.subjobs: + if not _sj.status in ['completed', 'completing']: + print(f"WARNING: Subjobs {_sj.id} did not complete") + continue + for _f in _sj.outputfiles: + if isinstance(_f, MassStorageFile): + _loc = _f.locations[0] + # Check you can open it +# try: +# _tfile = uproot.open(_loc) +# _tfile.values() +# _tfile.close() +# except uproot.deserialization.DeserializationError: +# print(f"ERROR: Unable to open {_loc} from job {_sj.fqid}! Job likely failed!") +# _tfile.close() +# _sj.force_status('failed') + _size = rucio_it_tools.rucio_it_register.get_file_on_disk_size_in_bytes(_loc) + _checksum = rucio_it_tools.rucio_it_register.get_file_on_disk_adler32_checksum(_loc) +# print(_loc,' - ', _size, ' - ', _checksum) + file_list.append({ + "path": _loc, + "size": _size, + "adler32": _checksum + }) + metadata = { + "title" : j.name, + "ganga_id": str(j.id), + "completion_time" : str(j.time.backend_final()), + "job_args" : str([_a for _a in j.application.args]), + "run_nos" : str([(_sj.id, _sj.application.args[-1]) for _sj in j.subjobs if _sj.status=='completed']), + "n_jobs" : str(len(j.subjobs)), + "creator": os.environ.get("USER"), + "comment": j.comment + } +# print(f"INFO: File list - {file_list}") +# print(f"INFO: metadata - {metadata}") + + if len(file_list)>0: + rucio_it_tools.rucio_it_register.register_files_with_structure( + rse_name = "SHIP_TIER_0_DISK", + files = file_list, + metadata = metadata, +# dry_run = True + ) + return True From 4075ea5cdaa445876119e6390fe838f4037b8d83 Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Wed, 10 Dec 2025 01:23:26 +0100 Subject: [PATCH 16/17] fix --- fixedTarget/batch/postprocessor.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fixedTarget/batch/postprocessor.py b/fixedTarget/batch/postprocessor.py index 70ef267..ae389c9 100644 --- a/fixedTarget/batch/postprocessor.py +++ b/fixedTarget/batch/postprocessor.py @@ -43,8 +43,8 @@ def check(j): "creator": os.environ.get("USER"), "comment": j.comment } - print(f"INFO: File list - {file_list}") - print(f"INFO: metadata - {metadata}") +# print(f"INFO: File list - {file_list}") +# print(f"INFO: metadata - {metadata}") if len(file_list)>0: try: @@ -54,8 +54,8 @@ def check(j): metadata = metadata, dry_run = True # Comment this when you run it for real ) - except: - print("ERROR: Not able to register file {file_list} with rucio") + except Exception as e: + print(f"ERROR: Not able to register file {file_list} with rucio: {e}") j.force_status("failed") j.comment += " - Rucio registration failed" return False From 5149408eff39fc57daf32474eed4ac142c9b2b81 Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Wed, 10 Dec 2025 19:02:08 +0100 Subject: [PATCH 17/17] fixes --- fixedTarget/batch/gangaScript.py | 4 +++- fixedTarget/batch/postprocessor_master.py | 18 ++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/fixedTarget/batch/gangaScript.py b/fixedTarget/batch/gangaScript.py index 6f2360f..a35372b 100644 --- a/fixedTarget/batch/gangaScript.py +++ b/fixedTarget/batch/gangaScript.py @@ -3,7 +3,7 @@ import random # Set this path to wherever you want the output to go -config['Output']['MassStorageFile']['uploadOptions']['path'] = '/eos/experiment/ship/test/masmith_test/' +#config['Output']['MassStorageFile']['uploadOptions']['path'] = '/eos/experiment/ship/test/masmith_test/' config['Output']['MassStorageFile']['uploadOptions']['defaultProtocol'] = 'root://eospublic.cern.ch' # Now set up the random seed, how many events per subjobs and how many events total @@ -30,6 +30,8 @@ # Add in the postprocessor to do the file registration cc = CustomChecker(module = 'postprocessor.py') #cc = CustomChecker(module = 'postprocessor_master.py') +fc = FileChecker(files = ['stdout'], searchStrings = ['Macro finished successfully.'], failIfFound = False) +j.postprocessors.append(fc) j.postprocessors.append(cc) j.comment = f'{evtsPerJob} events in each of {nSJ} subjobs' j.submit() diff --git a/fixedTarget/batch/postprocessor_master.py b/fixedTarget/batch/postprocessor_master.py index d9a6866..c397f0a 100644 --- a/fixedTarget/batch/postprocessor_master.py +++ b/fixedTarget/batch/postprocessor_master.py @@ -49,10 +49,16 @@ def check(j): # print(f"INFO: metadata - {metadata}") if len(file_list)>0: - rucio_it_tools.rucio_it_register.register_files_with_structure( - rse_name = "SHIP_TIER_0_DISK", - files = file_list, - metadata = metadata, -# dry_run = True - ) + try: + rucio_it_tools.rucio_it_register.register_files_with_structure( + rse_name = "SHIP_TIER_0_DISK", + files = file_list, + metadata = metadata, + # dry_run = True + ) + except Exception as e: + print(f"ERROR: Not able to register file {file_list} with rucio: {e}") + j.force_status("failed") + j.comment += " - Rucio registration failed" + return False return True