@@ -7,6 +7,9 @@ process AliNe {
77 tag " $pipeline_name "
88 label ' aline'
99 publishDir " ${ params.outdir} " , mode: ' copy'
10+
11+ errorStrategy ' terminate' // to avoid any retry
12+ maxRetries 0 // Override global retry config - do not retry this process
1013
1114 input:
1215 val pipeline_name // String
@@ -27,13 +30,15 @@ process AliNe {
2730 script:
2831 def nxf_cmd = " nextflow run ${ pipeline_name} ${ profile} ${ config} --reads ${ reads} --reference ${ genome} ${ read_type} ${ aligner} ${ library_type} --annotation ${ annotation} --data_type rna --outdir \$ WORK_DIR/AliNe"
2932 """
33+ echo "[AliNe] Process started at \$ (date '+%Y-%m-%d %H:%M:%S')"
34+
3035 # Save absolute work directory before changing context
3136 WORK_DIR=\$ (pwd)
32-
37+
3338 # Create cache directory for resume AliNe run made from different working directory
3439 mkdir -p "${ cache_dir} "
3540 cd "${ cache_dir} "
36-
41+
3742 # Save command for reference/debugging
3843 echo "${ nxf_cmd} " > \$ WORK_DIR/nf-cmd.sh
3944
@@ -47,38 +52,47 @@ process AliNe {
4752 #SBATCH --job-name=rain_AliNe_pipeline
4853 #SBATCH --cpus-per-task=1
4954 #SBATCH --mem=4G
55+ #SBATCH --constraint=infiniband
5056 #SBATCH --time=2-00:00:00
5157 #SBATCH --output=\$ WORK_DIR/aline_%j.out
5258 #SBATCH --error=\$ WORK_DIR/aline_%j.err
5359
5460 set -euo pipefail
5561
56- cd "${ cache_dir} "
57-
62+ # Load required modules
63+ module load nextflow
64+ module load singularity
65+
5866 echo "Starting AliNe pipeline at \$ (date)"
5967 ${ nxf_cmd}
6068 echo "AliNe pipeline completed at \$ (date)"
6169 SBATCH_EOF
6270
6371 # Submit job and capture job ID
6472 JOB_ID=\$ (sbatch --parsable \$ WORK_DIR/aline_job.sh)
65- echo "[AliNe] Submitted SLURM job: \$ JOB_ID"
73+ echo "[AliNe] Submitted SLURM job: \$ JOB_ID at \$ (date '+%Y-%m-%d %H:%M:%S')"
6674 echo \$ JOB_ID > \$ WORK_DIR/aline_job_id.txt
6775
6876 # Wait for job to appear in scheduler queue
69- echo "[AliNe] Waiting for job to appear in scheduler queue..."
70- sleep 5
71-
72- RETRY=0
73- while [ \$ RETRY -lt 12 ]; do
77+ # Simple wait for job to appear or to be started
78+ echo "[AliNe] Waiting for job \$ JOB_ID to appear in queue or to be started..."
79+ while true; do
80+ # Check if job is in queue
7481 if squeue -j \$ JOB_ID 2>/dev/null | grep -q \$ JOB_ID; then
7582 echo "[AliNe] Job \$ JOB_ID is now visible in queue"
7683 break
84+ else
85+ echo "[AliNe] Job \$ JOB_ID not yet visible in queue \$ (date '+%Y-%m-%d %H:%M:%S')"
86+ fi
87+ # Check job state (crash test: if job has started or finished)
88+ JOB_STATE=\$ (sacct -j \$ JOB_ID --format=State --noheader | head -1 | tr -d ' ')
89+ if [[ "\$ JOB_STATE" =~ ^(RUNNING|COMPLETED|FAILED|CANCELLED|TIMEOUT|PREEMPTED|NODE_FAIL|OUT_OF_MEMORY)\$ ]]; then
90+ echo "[AliNe] Job \$ JOB_ID has state: \$ JOB_STATE (not visible in queue, but started or finished)"
91+ break
7792 fi
78- echo "[AliNe] Job not yet visible, waiting... (attempt \$ ((RETRY+1))/12) "
93+ echo "[AliNe] Job not yet visible, waiting..."
7994 sleep 5
80- RETRY=\$ ((RETRY+1))
81- done
95+ done # Simple wait for job to appear or to be started
8296
8397 # Wait for job completion
8498 echo "[AliNe] Waiting for job \$ JOB_ID to complete..."
@@ -88,35 +102,36 @@ process AliNe {
88102
89103 # Check job exit status
90104 JOB_STATE=\$ (sacct -j \$ JOB_ID --format=State --noheader | head -1 | tr -d ' ')
91- echo "[AliNe] Job \$ JOB_ID finished with state: \$ JOB_STATE"
105+ echo "[AliNe] Job \$ JOB_ID finished with state: \$ JOB_STATE at \$ (date '+%Y-%m-%d %H:%M:%S') "
92106
93107 if [[ "\$ JOB_STATE" != "COMPLETED" ]]; then
94- echo "[AliNe] ERROR: Job failed with state \$ JOB_STATE" >&2
95- cat \$ WORK_DIR/aline_*.err >&2 || true
108+ echo "[AliNe] ERROR: Job failed with state \$ JOB_STATE at \$ (date '+%Y-%m-%d %H:%M:%S')" >&2
109+ echo "With message (100 last lines)": >&2
110+ tail -n 100 \$ WORK_DIR/aline_\$ JOB_ID.out >&2
96111 exit 1
97112 fi
98113
99114 # Copy log for reference
100115 if [ -f .nextflow.log ]; then
101116 cp .nextflow.log \$ WORK_DIR/nextflow.log
102117 fi
103-
104- echo "[AliNe] Pipeline completed successfully via SLURM"
118+
119+ echo "[AliNe] Pipeline completed successfully via SLURM at \$ (date '+%Y-%m-%d %H:%M:%S') "
105120 else
106121 echo "[AliNe] Detected local/standard environment - running AliNe directly"
107122
108123 # Run nextflow command directly
109- ${ nxf_cmd}
110-
111- # Copy log for reference
112- if [ -f .nextflow.log ]; then
113- cp .nextflow.log \$ WORK_DIR/nextflow.log
114- fi
124+ ${ nxf_cmd} || {
125+ echo "[AliNe] ERROR: Pipeline failed at \$ (date '+%Y-%m-%d %H:%M:%S')" >&2
126+ exit 1
127+ }
115128
116- echo "[AliNe] Pipeline completed successfully (direct execution)"
129+ echo "[AliNe] Pipeline completed successfully (direct execution) at \$ (date '+%Y-%m-%d %H:%M:%S') "
117130 fi
131+
132+ echo "[AliNe] Process finished at \$ (date '+%Y-%m-%d %H:%M:%S')"
118133"""
119134
120135 output:
121136 path " AliNe" , emit: output
122- }
137+ }
0 commit comments