From 37802c092cc7155165f12355158dfcec3824d7a9 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 17 Dec 2024 12:16:49 -0500
Subject: [PATCH 01/18] refactor: simplify cluster profile names, convert
 cluster from yaml to json

---
 charlie                                       |   4 +-
 config/biowulf/cluster.yaml                   |  94 ++++++++++++++
 .../cluster_status.sh                         |   0
 config/{slurm-biowulf => biowulf}/config.yaml |   2 +-
 config/fnlcr/cluster.yaml                     |  93 ++++++++++++++
 .../{slurm-fnlcr => fnlcr}/cluster_status.sh  |   0
 config/{slurm-fnlcr => fnlcr}/config.yaml     |   2 +-
 config/slurm-biowulf/cluster.json             | 120 ------------------
 config/slurm-fnlcr/cluster.json               | 119 -----------------
 config/unknown/cluster.json                   | 119 -----------------
 10 files changed, 191 insertions(+), 362 deletions(-)
 create mode 100644 config/biowulf/cluster.yaml
 rename config/{slurm-biowulf => biowulf}/cluster_status.sh (100%)
 rename config/{slurm-biowulf => biowulf}/config.yaml (93%)
 create mode 100644 config/fnlcr/cluster.yaml
 rename config/{slurm-fnlcr => fnlcr}/cluster_status.sh (100%)
 rename config/{slurm-fnlcr => fnlcr}/config.yaml (93%)
 delete mode 100644 config/slurm-biowulf/cluster.json
 delete mode 100644 config/slurm-fnlcr/cluster.json
 delete mode 100644 config/unknown/cluster.json

diff --git a/charlie b/charlie
index 1c91cc1..fb0b2be 100755
--- a/charlie
+++ b/charlie
@@ -62,7 +62,7 @@ TEMP_DIR=""
 REFS_DIR=""
 CLUSTER_PROFILE="config/unknown"
 if [ "$PLATFORM" == "biowulf" ]; then
-  CLUSTER_PROFILE="config/slurm-biowulf"
+  CLUSTER_PROFILE="config/biowulf"
   PARTITION="ccr,$PARTITION"
   EXTRA_SINGULARITY_BINDS="/lscratch"
   CONDA_ACTIVATE='. "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh" && conda activate py311'
@@ -70,7 +70,7 @@ if [ "$PLATFORM" == "biowulf" ]; then
   TEMP_DIR='/lscratch/$SLURM_JOB_ID/'
   REFS_DIR="/gpfs/gsfs10/users/CCBR_Pipeliner/db/PipeDB/charlie/fastas_gtfs/"
 elif [ "$PLATFORM" == "fnlcr" ]; then
-  CLUSTER_PROFILE="config/slurm-fnlcr"
+  CLUSTER_PROFILE="config/fnlcr"
   EXTRA_SINGULARITY_BINDS="/scratch/local"
   CONDA_ACTIVATE=". '/mnt/projects/CCBR-Pipelines/resources/miniconda3/etc/profile.d/conda.sh' && conda activate py311"
   # make sure spooker is in the path
diff --git a/config/biowulf/cluster.yaml b/config/biowulf/cluster.yaml
new file mode 100644
index 0000000..d3c197e
--- /dev/null
+++ b/config/biowulf/cluster.yaml
@@ -0,0 +1,94 @@
+__default__:
+  gres: lscratch:256
+  mem: 40g
+  partition: ccr,norm
+  threads: "2"
+  time: 4:00:00
+  name: "{rule}.{wildcards}"
+  output: logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out
+  error: logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err
+cutadapt:
+  mem: 120g
+  threads: "56"
+  time: 6:00:00
+dcc:
+  mem: 120g
+  threads: "4"
+  time: 4:00:00
+find_circ_align:
+  mem: 120g
+  threads: "56"
+  time: 6:00:00
+find_circ:
+  mem: 120g
+  threads: "56"
+  time: 6:00:00
+mapsplice:
+  mem: 200g
+  threads: "56"
+  time: 48:00:00
+mapsplice_postprocess:
+  mem: 120g
+  threads: "4"
+  time: 4:00:00
+nclscan:
+  mem: 512g
+  threads: "56"
+  time: 4:00:00
+  partition: largemem
+fastqc:
+  mem: 40g
+  threads: "4"
+  time: 4:00:00
+ciri:
+  mem: 512g
+  threads: "56"
+  time: 4:00:00
+  partition: largemem
+filter_ciri_bam_for_BSJs:
+  mem: 512g
+  threads: "4"
+  time: 24:00:00
+  partition: largemem
+create_index:
+  mem: 200g
+  threads: "56"
+  time: 12:00:00
+star1p:
+  mem: 200g
+  threads: "56"
+  time: 6:00:00
+star2p:
+  mem: 200g
+  threads: "56"
+  time: 6:00:00
+star_circrnafinder:
+  mem: 200g
+  threads: "56"
+  time: 6:00:00
+estimate_duplication:
+  mem: 200g
+  threads: "4"
+  time: 4:00:00
+create_circExplorer_BSJ_bam:
+  mem: 120g
+  threads: "4"
+  time: 4:00:00
+create_circExplorer_linear_spliced_bams:
+  mem: 120g
+  threads: "56"
+  time: 8:00:00
+clear:
+  time: 1:00:00
+split_splice_reads_BAM_create_BW:
+  mem: 120g
+  time: 24:00:00
+split_linear_reads_BAM_create_BW:
+  mem: 120g
+  time: 24:00:00
+alignment_stats:
+  time: 1:00:00
+merge_per_sample:
+  time: 1:00:00
+merge_SJ_tabs:
+  time: 1:00:00
diff --git a/config/slurm-biowulf/cluster_status.sh b/config/biowulf/cluster_status.sh
similarity index 100%
rename from config/slurm-biowulf/cluster_status.sh
rename to config/biowulf/cluster_status.sh
diff --git a/config/slurm-biowulf/config.yaml b/config/biowulf/config.yaml
similarity index 93%
rename from config/slurm-biowulf/config.yaml
rename to config/biowulf/config.yaml
index 0697a63..efce53f 100644
--- a/config/slurm-biowulf/config.yaml
+++ b/config/biowulf/config.yaml
@@ -8,7 +8,7 @@ cluster: sbatch
   --output {cluster.output}
   --error {cluster.error}
   --gres {cluster.gres}
-cluster-config: "cluster.json"
+cluster-config: "cluster.yaml"
 cluster-status: "cluster_status.sh"
 jobs: 499
 immediate-submit: false
diff --git a/config/fnlcr/cluster.yaml b/config/fnlcr/cluster.yaml
new file mode 100644
index 0000000..428f49f
--- /dev/null
+++ b/config/fnlcr/cluster.yaml
@@ -0,0 +1,93 @@
+__default__:
+  mem: 40g
+  partition: norm
+  threads: "2"
+  time: 4:00:00
+  name: "{rule}.{wildcards}"
+  output: logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out
+  error: logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err
+cutadapt:
+  mem: 120g
+  threads: "32"
+  time: 6:00:00
+dcc:
+  mem: 120g
+  threads: "4"
+  time: 4:00:00
+find_circ_align:
+  mem: 120g
+  threads: "32"
+  time: 6:00:00
+find_circ:
+  mem: 120g
+  threads: "32"
+  time: 6:00:00
+mapsplice:
+  mem: 200g
+  threads: "32"
+  time: 48:00:00
+mapsplice_postprocess:
+  mem: 120g
+  threads: "4"
+  time: 4:00:00
+nclscan:
+  mem: 512g
+  threads: "32"
+  time: 4:00:00
+  partition: largemem
+fastqc:
+  mem: 40g
+  threads: "4"
+  time: 4:00:00
+ciri:
+  mem: 512g
+  threads: "32"
+  time: 4:00:00
+  partition: largemem
+filter_ciri_bam_for_BSJs:
+  mem: 512g
+  threads: "4"
+  time: 24:00:00
+  partition: largemem
+create_index:
+  mem: 200g
+  threads: "32"
+  time: 12:00:00
+star1p:
+  mem: 200g
+  threads: "32"
+  time: 6:00:00
+star2p:
+  mem: 200g
+  threads: "32"
+  time: 6:00:00
+star_circrnafinder:
+  mem: 200g
+  threads: "32"
+  time: 6:00:00
+estimate_duplication:
+  mem: 200g
+  threads: "4"
+  time: 4:00:00
+create_circExplorer_BSJ_bam:
+  mem: 120g
+  threads: "4"
+  time: 4:00:00
+create_circExplorer_linear_spliced_bams:
+  mem: 120g
+  threads: "32"
+  time: 8:00:00
+clear:
+  time: 1:00:00
+split_splice_reads_BAM_create_BW:
+  mem: 120g
+  time: 24:00:00
+split_linear_reads_BAM_create_BW:
+  mem: 120g
+  time: 24:00:00
+alignment_stats:
+  time: 1:00:00
+merge_per_sample:
+  time: 1:00:00
+merge_SJ_tabs:
+  time: 1:00:00
diff --git a/config/slurm-fnlcr/cluster_status.sh b/config/fnlcr/cluster_status.sh
similarity index 100%
rename from config/slurm-fnlcr/cluster_status.sh
rename to config/fnlcr/cluster_status.sh
diff --git a/config/slurm-fnlcr/config.yaml b/config/fnlcr/config.yaml
similarity index 93%
rename from config/slurm-fnlcr/config.yaml
rename to config/fnlcr/config.yaml
index 8fa374f..6f7e685 100644
--- a/config/slurm-fnlcr/config.yaml
+++ b/config/fnlcr/config.yaml
@@ -7,7 +7,7 @@ cluster: sbatch
   --job-name {cluster.name}
   --output {cluster.output}
   --error {cluster.error}
-cluster-config: "cluster.json"
+cluster-config: "cluster.yaml"
 cluster-status: "cluster_status.sh"
 jobs: 499
 immediate-submit: false
diff --git a/config/slurm-biowulf/cluster.json b/config/slurm-biowulf/cluster.json
deleted file mode 100644
index 028a2e2..0000000
--- a/config/slurm-biowulf/cluster.json
+++ /dev/null
@@ -1,120 +0,0 @@
-{
-    "__default__": {
-        "gres": "lscratch:256",
-        "mem": "40g",
-        "partition": "ccr,norm",
-        "threads": "2",
-        "time": "4:00:00",
-        "name": "{rule}.{wildcards}",
-        "output": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out",
-        "error": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err"
-    },
-    "cutadapt": {
-        "mem": "120g",
-        "threads": "56",
-        "time": "6:00:00"
-    },
-    "dcc": {
-        "mem": "120g",
-        "threads": "4",
-        "time": "4:00:00"
-    },
-    "find_circ_align": {
-        "mem": "120g",
-        "threads": "56",
-        "time": "6:00:00"
-    },
-    "find_circ": {
-        "mem": "120g",
-        "threads": "56",
-        "time": "6:00:00"
-    },
-    "mapsplice": {
-        "mem": "200g",
-        "threads": "56",
-        "time": "48:00:00"
-    },
-    "mapsplice_postprocess": {
-        "mem": "120g",
-        "threads": "4",
-        "time": "4:00:00"
-    },
-    "nclscan": {
-        "mem": "512g",
-        "threads": "56",
-        "time": "4:00:00",
-        "partition": "largemem"
-    },
-    "fastqc": {
-        "mem": "40g",
-        "threads": "4",
-        "time": "4:00:00"
-    },
-    "ciri": {
-        "mem": "512g",
-        "threads": "56",
-        "time": "4:00:00",
-        "partition": "largemem"
-    },
-    "filter_ciri_bam_for_BSJs": {
-        "mem": "512g",
-        "threads": "4",
-        "time": "24:00:00",
-        "partition": "largemem"
-    },
-    "create_index": {
-        "mem": "200g",
-        "threads": "56",
-        "time": "12:00:00"
-    },
-    "star1p": {
-        "mem": "200g",
-        "threads": "56",
-        "time": "6:00:00"
-    },
-    "star2p": {
-        "mem": "200g",
-        "threads": "56",
-        "time": "6:00:00"
-    },
-    "star_circrnafinder": {
-        "mem": "200g",
-        "threads": "56",
-        "time": "6:00:00"
-    },
-    "estimate_duplication": {
-        "mem": "200g",
-        "threads": "4",
-        "time": "4:00:00"
-    },
-    "create_circExplorer_BSJ_bam": {
-        "mem": "120g",
-        "threads": "4",
-        "time": "4:00:00"
-    },
-    "create_circExplorer_linear_spliced_bams": {
-        "mem": "120g",
-        "threads": "56",
-        "time": "8:00:00"
-    },
-    "clear": {
-        "time": "1:00:00"
-    },
-    "split_splice_reads_BAM_create_BW": {
-        "mem": "120g",
-        "time": "24:00:00"
-    },
-    "split_linear_reads_BAM_create_BW": {
-        "mem": "120g",
-        "time": "24:00:00"
-    },
-    "alignment_stats": {
-        "time": "1:00:00"
-    },
-    "merge_per_sample": {
-        "time": "1:00:00"
-    },
-    "merge_SJ_tabs": {
-        "time": "1:00:00"
-    }
-}
diff --git a/config/slurm-fnlcr/cluster.json b/config/slurm-fnlcr/cluster.json
deleted file mode 100644
index fbc50f9..0000000
--- a/config/slurm-fnlcr/cluster.json
+++ /dev/null
@@ -1,119 +0,0 @@
-{
-    "__default__": {
-        "mem": "40g",
-        "partition": "norm",
-        "threads": "2",
-        "time": "4:00:00",
-        "name": "{rule}.{wildcards}",
-        "output": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out",
-        "error": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err"
-    },
-    "cutadapt": {
-        "mem": "120g",
-        "threads": "32",
-        "time": "6:00:00"
-    },
-    "dcc": {
-        "mem": "120g",
-        "threads": "4",
-        "time": "4:00:00"
-    },
-    "find_circ_align": {
-        "mem": "120g",
-        "threads": "32",
-        "time": "6:00:00"
-    },
-    "find_circ": {
-        "mem": "120g",
-        "threads": "32",
-        "time": "6:00:00"
-    },
-    "mapsplice": {
-        "mem": "200g",
-        "threads": "32",
-        "time": "48:00:00"
-    },
-    "mapsplice_postprocess": {
-        "mem": "120g",
-        "threads": "4",
-        "time": "4:00:00"
-    },
-    "nclscan": {
-        "mem": "512g",
-        "threads": "32",
-        "time": "4:00:00",
-        "partition": "largemem"
-    },
-    "fastqc": {
-        "mem": "40g",
-        "threads": "4",
-        "time": "4:00:00"
-    },
-    "ciri": {
-        "mem": "512g",
-        "threads": "32",
-        "time": "4:00:00",
-        "partition": "largemem"
-    },
-    "filter_ciri_bam_for_BSJs": {
-        "mem": "512g",
-        "threads": "4",
-        "time": "24:00:00",
-        "partition": "largemem"
-    },
-    "create_index": {
-        "mem": "200g",
-        "threads": "32",
-        "time": "12:00:00"
-    },
-    "star1p": {
-        "mem": "200g",
-        "threads": "32",
-        "time": "6:00:00"
-    },
-    "star2p": {
-        "mem": "200g",
-        "threads": "32",
-        "time": "6:00:00"
-    },
-    "star_circrnafinder": {
-        "mem": "200g",
-        "threads": "32",
-        "time": "6:00:00"
-    },
-    "estimate_duplication": {
-        "mem": "200g",
-        "threads": "4",
-        "time": "4:00:00"
-    },
-    "create_circExplorer_BSJ_bam": {
-        "mem": "120g",
-        "threads": "4",
-        "time": "4:00:00"
-    },
-    "create_circExplorer_linear_spliced_bams": {
-        "mem": "120g",
-        "threads": "32",
-        "time": "8:00:00"
-    },
-    "clear": {
-        "time": "1:00:00"
-    },
-    "split_splice_reads_BAM_create_BW": {
-        "mem": "120g",
-        "time": "24:00:00"
-    },
-    "split_linear_reads_BAM_create_BW": {
-        "mem": "120g",
-        "time": "24:00:00"
-    },
-    "alignment_stats": {
-        "time": "1:00:00"
-    },
-    "merge_per_sample": {
-        "time": "1:00:00"
-    },
-    "merge_SJ_tabs": {
-        "time": "1:00:00"
-    }
-}
diff --git a/config/unknown/cluster.json b/config/unknown/cluster.json
deleted file mode 100644
index fbc50f9..0000000
--- a/config/unknown/cluster.json
+++ /dev/null
@@ -1,119 +0,0 @@
-{
-    "__default__": {
-        "mem": "40g",
-        "partition": "norm",
-        "threads": "2",
-        "time": "4:00:00",
-        "name": "{rule}.{wildcards}",
-        "output": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out",
-        "error": "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err"
-    },
-    "cutadapt": {
-        "mem": "120g",
-        "threads": "32",
-        "time": "6:00:00"
-    },
-    "dcc": {
-        "mem": "120g",
-        "threads": "4",
-        "time": "4:00:00"
-    },
-    "find_circ_align": {
-        "mem": "120g",
-        "threads": "32",
-        "time": "6:00:00"
-    },
-    "find_circ": {
-        "mem": "120g",
-        "threads": "32",
-        "time": "6:00:00"
-    },
-    "mapsplice": {
-        "mem": "200g",
-        "threads": "32",
-        "time": "48:00:00"
-    },
-    "mapsplice_postprocess": {
-        "mem": "120g",
-        "threads": "4",
-        "time": "4:00:00"
-    },
-    "nclscan": {
-        "mem": "512g",
-        "threads": "32",
-        "time": "4:00:00",
-        "partition": "largemem"
-    },
-    "fastqc": {
-        "mem": "40g",
-        "threads": "4",
-        "time": "4:00:00"
-    },
-    "ciri": {
-        "mem": "512g",
-        "threads": "32",
-        "time": "4:00:00",
-        "partition": "largemem"
-    },
-    "filter_ciri_bam_for_BSJs": {
-        "mem": "512g",
-        "threads": "4",
-        "time": "24:00:00",
-        "partition": "largemem"
-    },
-    "create_index": {
-        "mem": "200g",
-        "threads": "32",
-        "time": "12:00:00"
-    },
-    "star1p": {
-        "mem": "200g",
-        "threads": "32",
-        "time": "6:00:00"
-    },
-    "star2p": {
-        "mem": "200g",
-        "threads": "32",
-        "time": "6:00:00"
-    },
-    "star_circrnafinder": {
-        "mem": "200g",
-        "threads": "32",
-        "time": "6:00:00"
-    },
-    "estimate_duplication": {
-        "mem": "200g",
-        "threads": "4",
-        "time": "4:00:00"
-    },
-    "create_circExplorer_BSJ_bam": {
-        "mem": "120g",
-        "threads": "4",
-        "time": "4:00:00"
-    },
-    "create_circExplorer_linear_spliced_bams": {
-        "mem": "120g",
-        "threads": "32",
-        "time": "8:00:00"
-    },
-    "clear": {
-        "time": "1:00:00"
-    },
-    "split_splice_reads_BAM_create_BW": {
-        "mem": "120g",
-        "time": "24:00:00"
-    },
-    "split_linear_reads_BAM_create_BW": {
-        "mem": "120g",
-        "time": "24:00:00"
-    },
-    "alignment_stats": {
-        "time": "1:00:00"
-    },
-    "merge_per_sample": {
-        "time": "1:00:00"
-    },
-    "merge_SJ_tabs": {
-        "time": "1:00:00"
-    }
-}

From 06f663c7694c3ce00cc784cda1f1485148f23109 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 17 Dec 2024 12:42:14 -0500
Subject: [PATCH 02/18] feat: adapt grid engine profile from
 https://github.com/Snakemake-Profiles/sge

---
 config/eddie/cluster.yaml  | 93 ++++++++++++++++++++++++++++++++++++++
 config/eddie/config.yaml   | 26 +++++++++++
 config/eddie/sge-cancel.py |  8 ++++
 config/eddie/sge-status.py | 69 ++++++++++++++++++++++++++++
 4 files changed, 196 insertions(+)
 create mode 100644 config/eddie/cluster.yaml
 create mode 100644 config/eddie/config.yaml
 create mode 100644 config/eddie/sge-cancel.py
 create mode 100644 config/eddie/sge-status.py

diff --git a/config/eddie/cluster.yaml b/config/eddie/cluster.yaml
new file mode 100644
index 0000000..995ad93
--- /dev/null
+++ b/config/eddie/cluster.yaml
@@ -0,0 +1,93 @@
+__default__:
+  output: "logs/{rule}.{wildcards}.$JOB_ID.out"
+  error: "logs/{rule}.{wildcards}.$JOB_ID.err"
+  mem: 40g
+  threads: 2
+  time: 4:00:00
+  name: "{rule}.{wildcards}"
+
+cutadapt:
+  mem: 120g
+  threads: 56
+  time: 6:00:00
+dcc:
+  mem: 120g
+  threads: 4
+  time: 4:00:00
+find_circ_align:
+  mem: 120g
+  threads: 56
+  time: 6:00:00
+find_circ:
+  mem: 120g
+  threads: 56
+  time: 6:00:00
+mapsplice:
+  mem: 200g
+  threads: 56
+  time: 48:00:00
+mapsplice_postprocess:
+  mem: 120g
+  threads: 4
+  time: 4:00:00
+nclscan:
+  mem: 512g
+  threads: 56
+  time: 4:00:00
+  partition: largemem
+fastqc:
+  mem: 40g
+  threads: 4
+  time: 4:00:00
+ciri:
+  mem: 512g
+  threads: 56
+  time: 4:00:00
+  partition: largemem
+filter_ciri_bam_for_BSJs:
+  mem: 512g
+  threads: 4
+  time: 24:00:00
+  partition: largemem
+create_index:
+  mem: 200g
+  threads: 56
+  time: 12:00:00
+star1p:
+  mem: 200g
+  threads: 56
+  time: 6:00:00
+star2p:
+  mem: 200g
+  threads: 56
+  time: 6:00:00
+star_circrnafinder:
+  mem: 200g
+  threads: 56
+  time: 6:00:00
+estimate_duplication:
+  mem: 200g
+  threads: 4
+  time: 4:00:00
+create_circExplorer_BSJ_bam:
+  mem: 120g
+  threads: 4
+  time: 4:00:00
+create_circExplorer_linear_spliced_bams:
+  mem: 120g
+  threads: 56
+  time: 8:00:00
+clear:
+  time: 1:00:00
+split_splice_reads_BAM_create_BW:
+  mem: 120g
+  time: 24:00:00
+split_linear_reads_BAM_create_BW:
+  mem: 120g
+  time: 24:00:00
+alignment_stats:
+  time: 1:00:00
+merge_per_sample:
+  time: 1:00:00
+merge_SJ_tabs:
+  time: 1:00:00
diff --git a/config/eddie/config.yaml b/config/eddie/config.yaml
new file mode 100644
index 0000000..7718c71
--- /dev/null
+++ b/config/eddie/config.yaml
@@ -0,0 +1,26 @@
+cluster: qsub
+  -cwd
+  -l h_rt={cluster.time}
+  -l h_vmem={cluster.mem}
+  -pe sharedmem {cluster.threads}
+  -N {cluster.name}
+  -o {cluster.output}
+  -e {cluster.error}
+cluster-config: "cluster.yaml"
+cluster-status: "sge-status.py"
+cluster-cancel: "sge-cancel.py"
+cluster-cancel-nargs: 20
+max-jobs-per-second: 1
+max-status-checks-per-second: 1
+latency-wait: 60
+local-cores: 1
+jobs: 499
+immediate-submit: false
+verbose: true
+notemp: true
+printshellcmds: true
+use-singularity: true
+rerun-incomplete: true
+rerun-triggers: mtime
+retries: 2
+keep-going: true
diff --git a/config/eddie/sge-cancel.py b/config/eddie/sge-cancel.py
new file mode 100644
index 0000000..54f006a
--- /dev/null
+++ b/config/eddie/sge-cancel.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python3
+import subprocess as sp
+import shlex
+import sys
+
+jobid_list = ", ".join(sys.argv[1:])
+
+sp.check_call(shlex.split(f"qdel {jobid_list}"))
diff --git a/config/eddie/sge-status.py b/config/eddie/sge-status.py
new file mode 100644
index 0000000..3b25265
--- /dev/null
+++ b/config/eddie/sge-status.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+import re
+import subprocess as sp
+import shlex
+import sys
+import time
+import logging
+
+logger = logging.getLogger("__name__")
+logger.setLevel(40)
+
+STATUS_ATTEMPTS = 20
+
+jobid = int(sys.argv[1])
+job_status = "running"
+
+# WARNING this currently has no support for task array jobs
+
+for i in range(STATUS_ATTEMPTS):
+    # first try qstat to see if job is running
+    # we can use `qstat -s pr -u "*"` to check for all running and pending jobs
+    try:
+        qstat_res = sp.check_output(shlex.split(f"qstat -s pr")).decode().strip()
+
+        # skip the header using [2:]
+        res = {int(x.split()[0]): x.split()[4] for x in qstat_res.splitlines()[2:]}
+
+        # job is in an unspecified error state
+        if "E" in res[jobid]:
+            job_status = "failed"
+            break
+
+        job_status = "running"
+        break
+
+    except sp.CalledProcessError as e:
+        logger.error("qstat process error")
+        logger.error(e)
+    except KeyError as e:
+        # if the job has finished it won't appear in qstat and we should check qacct
+        # this will also provide the exit status (0 on success, 128 + exit_status on fail)
+        # Try getting job with scontrol instead in case sacct is misconfigured
+        try:
+            qacct_res = sp.check_output(shlex.split(f"qacct -j {jobid}"))
+
+            exit_code = int(
+                re.search("exit_status  ([0-9]+)", qacct_res.decode()).group(1)
+            )
+
+            if exit_code == 0:
+                job_status = "success"
+                break
+
+            if exit_code != 0:
+                job_status = "failed"
+                break
+
+        except sp.CalledProcessError as e:
+            logger.warning("qacct process error")
+            logger.warning(e)
+            if i >= STATUS_ATTEMPTS - 1:
+                job_status = "failed"
+                break
+            else:
+                # qacct can be quite slow to update on large servers
+                time.sleep(5)
+        pass
+
+print(job_status)

From 9ab541b6219c6b0b43156c24a24d880c195635d1 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 17 Dec 2024 12:43:17 -0500
Subject: [PATCH 03/18] style: fix yaml quotes

---
 config/biowulf/cluster.yaml | 40 ++++++++++++++++++-------------------
 config/fnlcr/cluster.yaml   | 40 ++++++++++++++++++-------------------
 2 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/config/biowulf/cluster.yaml b/config/biowulf/cluster.yaml
index d3c197e..945419d 100644
--- a/config/biowulf/cluster.yaml
+++ b/config/biowulf/cluster.yaml
@@ -2,81 +2,81 @@ __default__:
   gres: lscratch:256
   mem: 40g
   partition: ccr,norm
-  threads: "2"
+  threads: 2
   time: 4:00:00
   name: "{rule}.{wildcards}"
-  output: logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out
-  error: logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err
+  output: "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out"
+  error: "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err"
 cutadapt:
   mem: 120g
-  threads: "56"
+  threads: 56
   time: 6:00:00
 dcc:
   mem: 120g
-  threads: "4"
+  threads: 4
   time: 4:00:00
 find_circ_align:
   mem: 120g
-  threads: "56"
+  threads: 56
   time: 6:00:00
 find_circ:
   mem: 120g
-  threads: "56"
+  threads: 56
   time: 6:00:00
 mapsplice:
   mem: 200g
-  threads: "56"
+  threads: 56
   time: 48:00:00
 mapsplice_postprocess:
   mem: 120g
-  threads: "4"
+  threads: 4
   time: 4:00:00
 nclscan:
   mem: 512g
-  threads: "56"
+  threads: 56
   time: 4:00:00
   partition: largemem
 fastqc:
   mem: 40g
-  threads: "4"
+  threads: 4
   time: 4:00:00
 ciri:
   mem: 512g
-  threads: "56"
+  threads: 56
   time: 4:00:00
   partition: largemem
 filter_ciri_bam_for_BSJs:
   mem: 512g
-  threads: "4"
+  threads: 4
   time: 24:00:00
   partition: largemem
 create_index:
   mem: 200g
-  threads: "56"
+  threads: 56
   time: 12:00:00
 star1p:
   mem: 200g
-  threads: "56"
+  threads: 56
   time: 6:00:00
 star2p:
   mem: 200g
-  threads: "56"
+  threads: 56
   time: 6:00:00
 star_circrnafinder:
   mem: 200g
-  threads: "56"
+  threads: 56
   time: 6:00:00
 estimate_duplication:
   mem: 200g
-  threads: "4"
+  threads: 4
   time: 4:00:00
 create_circExplorer_BSJ_bam:
   mem: 120g
-  threads: "4"
+  threads: 4
   time: 4:00:00
 create_circExplorer_linear_spliced_bams:
   mem: 120g
-  threads: "56"
+  threads: 56
   time: 8:00:00
 clear:
   time: 1:00:00
diff --git a/config/fnlcr/cluster.yaml b/config/fnlcr/cluster.yaml
index 428f49f..c1e0fb6 100644
--- a/config/fnlcr/cluster.yaml
+++ b/config/fnlcr/cluster.yaml
@@ -1,81 +1,81 @@
 __default__:
   mem: 40g
   partition: norm
-  threads: "2"
+  threads: 2
   time: 4:00:00
   name: "{rule}.{wildcards}"
-  output: logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out
-  error: logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err
+  output: "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.out"
+  error: "logs/${{SLURM_JOBID}}.%j.{rule}.{wildcards}.err"
 cutadapt:
   mem: 120g
-  threads: "32"
+  threads: 32
   time: 6:00:00
 dcc:
   mem: 120g
-  threads: "4"
+  threads: 4
   time: 4:00:00
 find_circ_align:
   mem: 120g
-  threads: "32"
+  threads: 32
   time: 6:00:00
 find_circ:
   mem: 120g
-  threads: "32"
+  threads: 32
   time: 6:00:00
 mapsplice:
   mem: 200g
-  threads: "32"
+  threads: 32
   time: 48:00:00
 mapsplice_postprocess:
   mem: 120g
-  threads: "4"
+  threads: 4
   time: 4:00:00
 nclscan:
   mem: 512g
-  threads: "32"
+  threads: 32
   time: 4:00:00
   partition: largemem
 fastqc:
   mem: 40g
-  threads: "4"
+  threads: 4
   time: 4:00:00
 ciri:
   mem: 512g
-  threads: "32"
+  threads: 32
   time: 4:00:00
   partition: largemem
 filter_ciri_bam_for_BSJs:
   mem: 512g
-  threads: "4"
+  threads: 4
   time: 24:00:00
   partition: largemem
 create_index:
   mem: 200g
-  threads: "32"
+  threads: 32
   time: 12:00:00
 star1p:
   mem: 200g
-  threads: "32"
+  threads: 32
   time: 6:00:00
 star2p:
   mem: 200g
-  threads: "32"
+  threads: 32
   time: 6:00:00
 star_circrnafinder:
   mem: 200g
-  threads: "32"
+  threads: 32
   time: 6:00:00
 estimate_duplication:
   mem: 200g
-  threads: "4"
+  threads: 4
   time: 4:00:00
 create_circExplorer_BSJ_bam:
   mem: 120g
-  threads: "4"
+  threads: 4
   time: 4:00:00
 create_circExplorer_linear_spliced_bams:
   mem: 120g
-  threads: "32"
+  threads: 32
   time: 8:00:00
 clear:
   time: 1:00:00

From d883661891c41a145f6c8234c19686e4af83ae60 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 17 Dec 2024 14:37:23 -0500
Subject: [PATCH 04/18] feat: add --platform & -m=runqsub to support eddie

also refactor templating logic
---
 README.md                         |   1 +
 charlie                           | 250 +++++++++++++-----------------
 config/biowulf/submit_script.sh   |  35 +++++
 config/config.yaml                |  31 ++--
 config/eddie/submit_script.sh     |  35 +++++
 config/fnlcr/submit_script.sh     |  35 +++++
 resources/NCLscan.config.template |  15 +-
 7 files changed, 238 insertions(+), 164 deletions(-)
 create mode 100644 config/biowulf/submit_script.sh
 create mode 100644 config/eddie/submit_script.sh
 create mode 100644 config/fnlcr/submit_script.sh

diff --git a/README.md b/README.md
index b1867ba..343da1d 100644
--- a/README.md
+++ b/README.md
@@ -181,6 +181,7 @@ Optional Arguments:
 --viruses|-v    : supply comma-separated list of viruses at command line                                (--runmode=init only)
 --manifest|-s   : absolute path to samples.tsv. This will be copied to output folder                    (--runmode=init only)
 --changegrp|-z  : change group to "Ziegelbauer_lab" before running anything. Biowulf-only. Useful for correctly setting permissions.
+--platform      : set the HPC platform (biowulf, fnlcr, eddie). If not set, CHARLIE will try to detect the platform with scontrol.
 --help|-h       : print this help
 
 
diff --git a/charlie b/charlie
index fb0b2be..d0e5186 100755
--- a/charlie
+++ b/charlie
@@ -5,12 +5,7 @@
 # CHARLIE
 
 set -eo pipefail
-## TODO module statements can only run on biowulf
 
-# decide trigger
-trigger="mtime"
-# trigger="input"
-# trigger="code"
 
 ##########################################################################################
 # functions
@@ -36,58 +31,16 @@ function get_platform() {
 ##########################################################################################
 # initial setup
 ##########################################################################################
-
+# set defaults for global variables
 # set PIPELINE_HOME
 PIPELINE_HOME=$(readlink -f $(dirname "$0"))
 
-# set snakefile
-SNAKEFILE="${PIPELINE_HOME}/workflow/Snakefile"
 
 # get github commit tag
 GIT_COMMIT_TAG=$(get_git_commitid_tag $PIPELINE_HOME)
 
-##########################################################################################
-# Some more set up
-##########################################################################################
-
 PYTHONVERSION="3"
 SNAKEMAKEVERSION="7"
-CONDA_ACTIVATE=''
-PATH_PREPEND=''
-MODULE_LOAD=''
-PLATFORM=$(get_platform)
-PARTITION='norm'
-EXTRA_SINGULARITY_BINDS=""
-TEMP_DIR=""
-REFS_DIR=""
-CLUSTER_PROFILE="config/unknown"
-if [ "$PLATFORM" == "biowulf" ]; then
-  CLUSTER_PROFILE="config/biowulf"
-  PARTITION="ccr,$PARTITION"
-  EXTRA_SINGULARITY_BINDS="/lscratch"
-  CONDA_ACTIVATE='. "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh" && conda activate py311'
-  MODULE_LOAD="module load python/$PYTHONVERSION snakemake/$SNAKEMAKEVERSION singularity; $CONDA_ACTIVATE"
-  TEMP_DIR='/lscratch/$SLURM_JOB_ID/'
-  REFS_DIR="/gpfs/gsfs10/users/CCBR_Pipeliner/db/PipeDB/charlie/fastas_gtfs/"
-elif [ "$PLATFORM" == "fnlcr" ]; then
-  CLUSTER_PROFILE="config/fnlcr"
-  EXTRA_SINGULARITY_BINDS="/scratch/local"
-  CONDA_ACTIVATE=". '/mnt/projects/CCBR-Pipelines/resources/miniconda3/etc/profile.d/conda.sh' && conda activate py311"
-  # make sure spooker is in the path
-  PATH_PREPEND='export PATH="/mnt/projects/CCBR-Pipelines/bin:$PATH"'
-  MODULE_LOAD="module load singularity; $PATH_PREPEND; $CONDA_ACTIVATE"
-  TEMP_DIR="/scratch/local/"
-  REFS_DIR="/mnt/projects/CCBR-Pipelines/db/charlie/fastas_gtfs/"
-else
-  echo """WARNING: detected platform is $PLATFORM. Please edit the files in config/unknown/ & config.yaml for compatibility with your computing environment
-    """
-fi
-
-# set defaults
-HOST="hg38"
-ADDITIVES="ERCC"
-VIRUSES="NC_009333.1"
-MANIFEST="${PIPELINE_HOME}/config/samples.tsv"
 
 # set variables
 SCRIPTNAME="$0"
@@ -156,8 +109,9 @@ Required Arguments:
 2.  RUNMODE     : [Type: String] Valid options:
     * init      : initialize workdir
     * dryrun    : dry run snakemake to generate DAG
-    * run       : run with slurm
-    * runlocal  : run without submitting to sbatch
+    * run       : run by submitting the job with slurm
+    * runqsub   : run by submitting the job with qsub
+    * runlocal  : run locally without submitting the job to a scheduler
     ADVANCED RUNMODES (use with caution!!)
     * unlock    : unlock WORKDIR if locked by snakemake NEVER UNLOCK WORKDIR WHERE PIPELINE IS CURRENTLY RUNNING!
     * reconfig  : recreate config file in WORKDIR (debugging option) EDITS TO config.yaml WILL BE LOST!
@@ -172,6 +126,7 @@ Optional Arguments:
 --viruses|-v    : supply comma-separated list of viruses at command line                                (--runmode=init only)
 --manifest|-s   : absolute path to samples.tsv. This will be copied to output folder                    (--runmode=init only)
 --changegrp|-z  : change group to "Ziegelbauer_lab" before running anything. Biowulf-only. Useful for correctly setting permissions.
+--platform      : set the HPC platform (biowulf, fnlcr, eddie). If not set, CHARLIE will try to detect the platform with scontrol.                    (--runmode=init only)
 --help|-h       : print this help
 
 
@@ -212,39 +167,34 @@ function err() { usage && cat <<< "
 
 function init() {
 
-# create output folder
-if [ -d $WORKDIR ];then err "Folder $WORKDIR already exists!"; fi
-mkdir -p $WORKDIR
-
-# copy config resources
-cp -r ${PIPELINE_HOME}/config $WORKDIR/
-
-# copy config template and samples files
-if [ ! -f $CONFIGFILE ];then
-sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" \
-    -e "s/WORKDIR/${WORKDIR//\//\\/}/g" \
-    -e "s/HOST/${HOST}/g" \
-    -e "s/ADDITIVES/${ADDITIVES}/g" \
-    -e "s/VIRUSES/${VIRUSES}/g" \
-    -e "s/TEMP_DIR/${TEMP_DIR//\//\\/}/g" \
-    -e "s/REFS_DIR/${REFS_DIR//\//\\/}/g" \
-    -e "s|CLUSTER_PROFILE|${CLUSTER_PROFILE}|g" \
-    ${PIPELINE_HOME}/config/config.yaml \
-    > $CONFIGFILE
-fi
-if [ ! -f $WORKDIR/nclscan.config ];then
-sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" -e "s/WORKDIR/${WORKDIR//\//\\/}/g" ${PIPELINE_HOME}/resources/NCLscan.config.template > $WORKDIR/nclscan.config
-fi
-
-if [ ! -f $WORKDIR/samples.tsv ];then
-cp $MANIFEST $WORKDIR/samples.tsv
-fi
-
-#create log and stats folders
-if [ ! -d $WORKDIR/logs ]; then mkdir -p $WORKDIR/logs;echo "Logs Dir: $WORKDIR/logs";fi
-if [ ! -d $WORKDIR/stats ];then mkdir -p $WORKDIR/stats;echo "Stats Dir: $WORKDIR/stats";fi
-
-echo "Done Initializing $WORKDIR. You can now edit $WORKDIR/config.yaml and $WORKDIR/samples.tsv"
+  # create output folder
+  if [ -d $WORKDIR ];then err "Folder $WORKDIR already exists!"; fi
+  mkdir -p $WORKDIR
+
+  # copy config resources
+  cp -r ${PIPELINE_HOME}/config $WORKDIR/
+
+  # copy config template and samples files
+  if [ ! -f $CONFIGFILE ];then
+    cat ${PIPELINE_HOME}/config/config.yaml |\
+      envsubst '$PIPELINE_HOME $WORKDIR $HOST $ADDITIVES $VIRUSES $TEMP_DIR $REFS_DIR $CLUSTER_PROFILE' \
+      > $CONFIGFILE
+  fi
+  if [ ! -f $WORKDIR/nclscan.config ];then
+    cat ${PIPELINE_HOME}/resources/NCLscan.config.template |\
+      envsubst '$WORKDIR' |\
+      > $WORKDIR/nclscan.config
+  fi
+
+  if [ ! -f $WORKDIR/samples.tsv ];then
+  cp $MANIFEST $WORKDIR/samples.tsv
+  fi
+
+  #create log and stats folders
+  if [ ! -d $WORKDIR/logs ]; then mkdir -p $WORKDIR/logs;echo "Logs Dir: $WORKDIR/logs";fi
+  if [ ! -d $WORKDIR/stats ];then mkdir -p $WORKDIR/stats;echo "Stats Dir: $WORKDIR/stats";fi
+
+  echo "Done Initializing $WORKDIR. You can now edit $WORKDIR/config.yaml and $WORKDIR/samples.tsv"
 
 }
 
@@ -301,16 +251,9 @@ function reconfig(){
   # rebuild config file and replace the config.yaml in the WORKDIR
   # this is only for dev purposes when new key-value pairs are being added to the config file
   check_essential_files
-  sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" \
-    -e "s/WORKDIR/${WORKDIR//\//\\/}/g" \
-    -e "s/HOST/${HOST}/g" \
-    -e "s/ADDITIVES/${ADDITIVES}/g" \
-    -e "s/VIRUSES/${VIRUSES}/g" \
-    -e "s/TEMP_DIR/${TEMP_DIR//\//\\/}/g" \
-    -e "s/REFS_DIR/${REFS_DIR//\//\\/}/g" \
-    -e "s|CLUSTER_PROFILE|${CLUSTER_PROFILE}|g" \
-    ${PIPELINE_HOME}/config/config.yaml \
-    > $CONFIGFILE
+  cat ${PIPELINE_HOME}/config/config.yaml |\
+    envsubst '$PIPELINE_HOME $WORKDIR $HOST $ADDITIVES $VIRUSES $TEMP_DIR $REFS_DIR $CLUSTER_PROFILE' \
+    > $WORKDIR/config.yaml
   echo "$WORKDIR/config.yaml has been updated!"
 }
 
@@ -335,6 +278,7 @@ function load_modules() {
 function runcheck(){
   check_essential_files
   load_modules
+  set_singularity_binds
 }
 
 ##########################################################################################
@@ -374,7 +318,7 @@ function unlock() {
 
 function set_singularity_binds() {
   binds=$( $PIPELINE_HOME/workflow/scripts/set_singularity_bind_paths.py ${WORKDIR}/config.yaml ${WORKDIR}/samples.tsv)
-  SINGULARITY_BINDS="-B $EXTRA_SINGULARITY_BINDS,$binds"
+  export SINGULARITY_BINDS="-B $EXTRA_SINGULARITY_BINDS,$binds"
 }
 ##########################################################################################
 # PRINT SINGULARITY BINDS ... print bound singularity folders for debugging
@@ -391,7 +335,7 @@ function printbinds(){
 
 function runlocal() {
   runcheck
-  set_singularity_binds
+  # TODO do not assume $SLURM_JOB_ID exists, may run on other platform without slurm e.g. eddie
   if [ "$SLURM_JOB_ID" == "" ];then err "runlocal can only be done on an interactive node"; exit 1; fi
   run "local"
 }
@@ -402,9 +346,12 @@ function runlocal() {
 
 function runslurm() {
   runcheck
-  set_singularity_binds
   run "--dry-run " && run "slurm"
 }
+function runqsub() {
+  runcheck
+  run "--dry-run " && run "qsub"
+}
 
 ##########################################################################################
 # CREATE RUNINFO ... create runinfo.yaml in workdir
@@ -508,50 +455,21 @@ function run() {
         --configfile $CONFIGFILE
     fi
 
-  elif [ "$1" == "slurm" ];then
+  elif [ "$1" == "slurm" ]; then
 
     preruncleanup
+    cat ${WORKDIR}/config/${PLATFORM}/submit_script.sh |\
+      envsubst '$CLUSTER_PROFILE $CONFIGFILE $EXPORT_SING_CACHE_DIR_CMD $MODULE_LOAD $PARTITION $SINGULARITY_BINDS $SNAKEFILE $trigger $WORKDIR' \
+      > ${WORKDIR}/submit_script.sh
+    sbatch ${WORKDIR}/submit_script.sh
 
-    cat > ${WORKDIR}/submit_script.sbatch << EOF
-#!/bin/bash
-#SBATCH --job-name="charlie"
-#SBATCH --mem=40g
-#SBATCH --partition="$PARTITION"
-#SBATCH --time=48:00:00
-#SBATCH --cpus-per-task=2
-#SBATCH --mail-type=BEGIN,END,FAIL
-
-cd \$SLURM_SUBMIT_DIR
-$MODULE_LOAD
-$EXPORT_SING_CACHE_DIR_CMD
-
-snakemake -s $SNAKEFILE \
-    --directory $WORKDIR \
-    --use-singularity \
-    --singularity-args "$SINGULARITY_BINDS" \
-    --use-envmodules \
-    --printshellcmds \
-    --latency-wait 300 \
-    --configfile $CONFIGFILE \
-    --profile $CLUSTER_PROFILE \
-    -j 500 \
-    --rerun-incomplete \
-    --rerun-triggers $trigger \
-    --retries 2 \
-    --keep-going \
-    --stats ${WORKDIR}/snakemake.stats \
-    2>&1 | tee ${WORKDIR}/snakemake.log
-
-if [ "\$?" -eq "0" ];then
-  snakemake -s $SNAKEFILE \
-  --directory $WORKDIR \
-  --report ${WORKDIR}/runslurm_snakemake_report.html \
-  --configfile $CONFIGFILE
-fi
-
-EOF
+  elif [ "$1" == "qsub" ]; then
 
-    sbatch ${WORKDIR}/submit_script.sbatch
+    preruncleanup
+    cat ${WORKDIR}/config/${PLATFORM}/submit_script.sh |\
+      envsubst '$CLUSTER_PROFILE $CONFIGFILE $EXPORT_SING_CACHE_DIR_CMD $MODULE_LOAD $PARTITION $SINGULARITY_BINDS $SNAKEFILE $trigger $WORKDIR' \
+      > ${WORKDIR}/submit_script.sh
+    pushd $WORKDIR && qsub submit_script.sh && popd
 
   elif [ "$1" == "--touch" ];then
 
@@ -636,6 +554,9 @@ function main(){
         MANIFEST="${i#*=}"
         if [ ! -f $MANIFEST ];then err "File $MANIFEST does NOT exist!";fi
       ;;
+      -p=*|--platform=*)
+        PLATFORM="${i#*=}"
+      ;;
       -h|--help)
         usage && exit 0;
       ;;
@@ -645,8 +566,22 @@ function main(){
   esac
   done
 
-  WORKDIR=$(readlink -f $WORKDIR)
+  export WORKDIR=$(readlink -f $WORKDIR)
+  export PIPELINE_HOME=$(readlink -f $(dirname "$0"))
+  export SNAKEFILE="${PIPELINE_HOME}/workflow/Snakefile"
   echo "Working Dir: $WORKDIR"
+  if [ -z "$PLATFORM" ]; then PLATFORM=$(get_platform); fi
+  export PLATFORM
+  echo "Platform: $PLATFORM"
+
+  # set defaults
+  if [ -z "$HOST" ]; then HOST="hg38"; fi
+  export HOST
+  if [ -z "$ADDITIVES" ]; then ADDITIVES="ERCC"; fi
+  export ADDITIVES
+  if [ -z "$VIRUSES" ]; then VIRUSES="NC_009333.1"; fi
+  export VIRUSES
+  export MANIFEST="${PIPELINE_HOME}/config/samples.tsv"
 
   if [[ -z "$SING_CACHE_DIR" ]]; then
     if [[ -d "/data/$USER" ]]; then
@@ -657,10 +592,46 @@ function main(){
     echo "singularity cache dir (--singcache) is not set, using ${SING_CACHE_DIR}"
   fi
   mkdir -p $SING_CACHE_DIR
-  EXPORT_SING_CACHE_DIR_CMD="export SINGULARITY_CACHEDIR=\"${SING_CACHE_DIR}\""
+  export EXPORT_SING_CACHE_DIR_CMD="export SINGULARITY_CACHEDIR=\"${SING_CACHE_DIR}\""
 
   # required files
-  CONFIGFILE="${WORKDIR}/config.yaml"
+  export CONFIGFILE="${WORKDIR}/config.yaml"
+
+  # decide trigger
+  export trigger="mtime"
+  # trigger="input"
+  # trigger="code"
+
+  # set variables based on the detected platform
+  if [ "$PLATFORM" == "biowulf" ]; then
+    CLUSTER_PROFILE="config/biowulf"
+    PARTITION="ccr,norm"
+    EXTRA_SINGULARITY_BINDS="/lscratch"
+    CONDA_ACTIVATE='. "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh" && conda activate py311'
+    MODULE_LOAD="module load python/$PYTHONVERSION snakemake/$SNAKEMAKEVERSION singularity; $CONDA_ACTIVATE"
+    TEMP_DIR='/lscratch/$SLURM_JOB_ID/'
+    REFS_DIR="/gpfs/gsfs10/users/CCBR_Pipeliner/db/PipeDB/charlie/fastas_gtfs/"
+  elif [ "$PLATFORM" == "fnlcr" ]; then
+    CLUSTER_PROFILE="config/fnlcr"
+    PARTITION="norm"
+    EXTRA_SINGULARITY_BINDS="/scratch/local"
+    CONDA_ACTIVATE=". '/mnt/projects/CCBR-Pipelines/resources/miniconda3/etc/profile.d/conda.sh' && conda activate py311"
+    # make sure spooker is in the path
+    PATH_PREPEND='export PATH="/mnt/projects/CCBR-Pipelines/bin:$PATH"'
+    MODULE_LOAD="module load singularity; $PATH_PREPEND; $CONDA_ACTIVATE"
+    TEMP_DIR="/scratch/local/"
+    REFS_DIR="/mnt/projects/CCBR-Pipelines/db/charlie/fastas_gtfs/"
+  elif [ "$PLATFORM" == "eddie" ]; then
+    # TODO fill in other variables for eddie
+    CLUSTER_PROFILE="config/eddie"
+    MODULE_LOAD="module load python/$PYTHONVERSION snakemake/$SNAKEMAKEVERSION singularity"
+    TEMP_DIR="/exports/eddie/scratch/$USER"
+  else
+    CLUSTER_PROFILE="config/unknown"
+    echo """WARNING: detected platform is $PLATFORM. Please edit the files in $CLUSTER_PROFILE & config.yaml for compatibility with your computing environment
+      """
+  fi
+  export PLATFORM CLUSTER_PROFILE PARTITION EXTRA_SINGULARITY_BINDS CONDA_ACTIVATE MODULE_LOAD TEMP_DIR REFS_DIR
 
   # change group to Ziegelbauer_lab before doing anything
   if [ "$CHANGEGRP" == "1" ]; then change_grp "$allargs"; fi
@@ -670,6 +641,7 @@ function main(){
     dryrun) dryrun && exit 0;;
     unlock) unlock && exit 0;;
     run) runslurm && exit 0;;
+    runqsub) runqsub && exit 0;;
     runlocal) runlocal && exit 0;;
     reset) reset && exit 0;;
     touch) touch && exit 0;;
diff --git a/config/biowulf/submit_script.sh b/config/biowulf/submit_script.sh
new file mode 100644
index 0000000..926718b
--- /dev/null
+++ b/config/biowulf/submit_script.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+#SBATCH --job-name="charlie"
+#SBATCH --mem=40g
+#SBATCH --partition="$PARTITION"
+#SBATCH --time=48:00:00
+#SBATCH --cpus-per-task=2
+#SBATCH --mail-type=BEGIN,END,FAIL
+
+cd $SLURM_SUBMIT_DIR
+$MODULE_LOAD
+$EXPORT_SING_CACHE_DIR_CMD
+
+snakemake -s $SNAKEFILE \
+    --directory $WORKDIR \
+    --use-singularity \
+    --singularity-args "$SINGULARITY_BINDS" \
+    --use-envmodules \
+    --printshellcmds \
+    --latency-wait 300 \
+    --configfile $CONFIGFILE \
+    --profile $CLUSTER_PROFILE \
+    -j 500 \
+    --rerun-incomplete \
+    --rerun-triggers $trigger \
+    --retries 2 \
+    --keep-going \
+    --stats ${WORKDIR}/snakemake.stats \
+    2>&1 | tee ${WORKDIR}/snakemake.log
+
+if [ "$?" -eq "0" ];then
+  snakemake -s $SNAKEFILE \
+  --directory $WORKDIR \
+  --report ${WORKDIR}/runslurm_snakemake_report.html \
+  --configfile $CONFIGFILE
+fi
diff --git a/config/config.yaml b/config/config.yaml
index c94dce0..8c14a81 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -1,14 +1,14 @@
 ## you probably need to change or comment/uncomment some of these
 #
 # The working dir... output will be in the results subfolder of the workdir
-workdir: "WORKDIR"
+workdir: "$WORKDIR"
 
 # temporary directory for intermediate files that are not saved
-tempdir: "TEMP_DIR"
+tempdir: "$TEMP_DIR"
 
 # tab delimited samples file ... should have the following 3 columns
 # sampleName	path_to_R1_fastq	path_to_R2_fastq
-samples: "WORKDIR/samples.tsv"
+samples: "$WORKDIR/samples.tsv"
 
 # Should the CLEAR pipeline be run? True or False WITHOUT quotes
 run_clear: True
@@ -26,7 +26,7 @@ run_circRNAFinder: True
 # Should the NCLscan pipeline be run? True or False WITHOUT quotes
 # This can only be run for PE data
 run_nclscan: False
-nclscan_config: "WORKDIR/nclscan.config"
+nclscan_config: "$WORKDIR/nclscan.config"
 
 # Should we also run find_circ? True or False WITHOUT quotes
 run_findcirc: False
@@ -38,9 +38,9 @@ findcirc_params: "--noncanonical"
 # host:       "hg38"
 # additives:  "ERCC" # options are ERCC and BAC16Insert
 # viruses:    "NC_009333.1"
-host: "HOST"
-additives: "ADDITIVES"
-viruses: "VIRUSES"
+host: "$HOST"
+additives: "$ADDITIVES"
+viruses: "$VIRUSES"
 # select viruses and other (ERCC/BAC): options are
 # ERCC
 # BAC16Insert
@@ -85,14 +85,13 @@ maxsize_host: 1000000000
 maxsize_virus: 5000
 
 ## you most probably dont need to change these
-scriptsdir: "PIPELINE_HOME/workflow/scripts"
-resourcesdir: "PIPELINE_HOME/resources"
+scriptsdir: "$PIPELINE_HOME/workflow/scripts"
+resourcesdir: "$PIPELINE_HOME/resources"
 
-# default cluster
-# cluster: "PIPELINE_HOME/resources/cluster.json"
-cluster: "WORKDIR/CLUSTER_PROFILE/cluster.json"
+# default cluster config file
+cluster: "$WORKDIR/$CLUSTER_PROFILE/cluster.yaml"
 
-adapters: "PIPELINE_HOME/resources/TruSeq_and_nextera_adapters.consolidated.fa"
+adapters: "$PIPELINE_HOME/resources/TruSeq_and_nextera_adapters.consolidated.fa"
 circexplorer_bsj_circRNA_min_reads: 3 # in addition to "known" and "low-conf" circRNAs identified by circexplorer, we also include those found in back_spliced.bed file but not classified as known/low-conf only if the number of reads supporting the BSJ call is greater than this number
 minreadcount: 3 # this is used to filter circRNAs while creating the per-sample counts table
 flanksize: 18 # 18bp flank on either side of the BSJ .. used by multiple BSJ callers
@@ -107,11 +106,11 @@ high_confidence_core_callers_plus_n: 1
 
 ciri_perl_script: "/opt2/CIRI_v2.0.6/CIRI2.pl" # path in docker container
 # change this path to a directory containing fasta and GTF files for all host and virus genomes
-fastas_gtfs_dir: "REFS_DIR"
+fastas_gtfs_dir: "$REFS_DIR"
 
 annotation_lookups:
-  hg38: "PIPELINE_HOME/resources/hg38_2_hg19_lookup.txt"
-  mm39: "PIPELINE_HOME/resources/mm39_circBase_annotation_lookup.txt"
+  hg38: "$PIPELINE_HOME/resources/hg38_2_hg19_lookup.txt"
+  mm39: "$PIPELINE_HOME/resources/mm39_circBase_annotation_lookup.txt"
 
 containers:
   base: "docker://nciccbr/ccbr_ubuntu_base_20.04:v7"
diff --git a/config/eddie/submit_script.sh b/config/eddie/submit_script.sh
new file mode 100644
index 0000000..4de3ddc
--- /dev/null
+++ b/config/eddie/submit_script.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+#$ -N charlie
+#$ -cwd
+#$ -l h_rt=48:00:00
+#$ -l h_vmem=40g
+#$ -pe sharedmem 2
+#$ -o logs/$JOB_NAME-$JOB_ID-$HOSTNAME.out
+#$ -e logs/$JOB_NAME-$JOB_ID-$HOSTNAME.err
+
+$MODULE_LOAD
+$EXPORT_SING_CACHE_DIR_CMD
+
+snakemake -s $SNAKEFILE \
+    --directory $WORKDIR \
+    --use-singularity \
+    --singularity-args "$SINGULARITY_BINDS" \
+    --use-envmodules \
+    --printshellcmds \
+    --latency-wait 300 \
+    --configfile $CONFIGFILE \
+    --profile $CLUSTER_PROFILE \
+    -j 500 \
+    --rerun-incomplete \
+    --rerun-triggers $trigger \
+    --retries 2 \
+    --keep-going \
+    --stats ${WORKDIR}/snakemake.stats \
+    2>&1 | tee ${WORKDIR}/snakemake.log
+
+if [ "$?" -eq "0" ];then
+  snakemake -s $SNAKEFILE \
+  --directory $WORKDIR \
+  --report ${WORKDIR}/runqsub_snakemake_report.html \
+  --configfile $CONFIGFILE
+fi
diff --git a/config/fnlcr/submit_script.sh b/config/fnlcr/submit_script.sh
new file mode 100644
index 0000000..926718b
--- /dev/null
+++ b/config/fnlcr/submit_script.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+#SBATCH --job-name="charlie"
+#SBATCH --mem=40g
+#SBATCH --partition="$PARTITION"
+#SBATCH --time=48:00:00
+#SBATCH --cpus-per-task=2
+#SBATCH --mail-type=BEGIN,END,FAIL
+
+cd $SLURM_SUBMIT_DIR
+$MODULE_LOAD
+$EXPORT_SING_CACHE_DIR_CMD
+
+snakemake -s $SNAKEFILE \
+    --directory $WORKDIR \
+    --use-singularity \
+    --singularity-args "$SINGULARITY_BINDS" \
+    --use-envmodules \
+    --printshellcmds \
+    --latency-wait 300 \
+    --configfile $CONFIGFILE \
+    --profile $CLUSTER_PROFILE \
+    -j 500 \
+    --rerun-incomplete \
+    --rerun-triggers $trigger \
+    --retries 2 \
+    --keep-going \
+    --stats ${WORKDIR}/snakemake.stats \
+    2>&1 | tee ${WORKDIR}/snakemake.log
+
+if [ "$?" -eq "0" ];then
+  snakemake -s $SNAKEFILE \
+  --directory $WORKDIR \
+  --report ${WORKDIR}/runslurm_snakemake_report.html \
+  --configfile $CONFIGFILE
+fi
diff --git a/resources/NCLscan.config.template b/resources/NCLscan.config.template
index 0f53d48..dd97366 100644
--- a/resources/NCLscan.config.template
+++ b/resources/NCLscan.config.template
@@ -7,22 +7,22 @@ NCLscan_dir = /opt2/NCLscan-1.7.0
 
 ## The directory of references and indices
 ## The script "create_reference.py" would create the needed references and indices here.
-NCLscan_ref_dir = WORKDIR/ref/NCLscan_index
+NCLscan_ref_dir = $WORKDIR/ref/NCLscan_index
 
 
 ## The following four reference files can be downloaded from the GENCODE website (http://www.gencodegenes.org/).
 
 ## The reference genome sequence, eg. /path/to/GRCh37.p13.genome.fa
-Reference_genome = WORKDIR/ref/ref.fa
+Reference_genome = $WORKDIR/ref/ref.fa
 
 ## The gene annotation file, eg. /path/to/gencode.v19.annotation.gtf
-Gene_annotation = WORKDIR/ref/ref.fixed.gtf
+Gene_annotation = $WORKDIR/ref/ref.fixed.gtf
 
 ## The protein-coding transcript sequences, eg. /path/to/gencode.v19.pc_transcripts.fa
-Protein_coding_transcripts = WORKDIR/ref/ref.transcripts.fa
+Protein_coding_transcripts = $WORKDIR/ref/ref.transcripts.fa
 
 ## The long non-coding RNA transcript sequences, eg. /path/to/gencode.v19.lncRNA_transcripts.fa
-lncRNA_transcripts = WORKDIR/ref/ref.dummy.fa
+lncRNA_transcripts = $WORKDIR/ref/ref.dummy.fa
 
 
 ## External tools
@@ -68,7 +68,7 @@ SeqOut_bin              = {NCLscan_bin}/SeqOut
 ### Advanced parameters ###
 ###########################
 
-## The following two parameters indicate the maximal read length (L) and fragment size of the used paired-end RNA-seq data (FASTQ files), where fragment size = 2L + insert size. 
+## The following two parameters indicate the maximal read length (L) and fragment size of the used paired-end RNA-seq data (FASTQ files), where fragment size = 2L + insert size.
 ## If L > 151, the users should change these two parameters to (L, 2L + insert size).
 max_read_len      = 151
 max_fragment_size = 500
@@ -96,6 +96,3 @@ bwa-mem-t = 56
 ## NOTE: The memory usage of each blat process would be up to 4 GB!
 ##
 mp_blat_process = 56
-
-
-

From dc80ccc20e52cd754d7198cf7e91ee94683b5356 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 17 Dec 2024 14:48:09 -0500
Subject: [PATCH 05/18] docs: note new platform & runmode options

---
 CHANGELOG.md | 2 ++
 README.md    | 6 ++++--
 charlie      | 8 +++++---
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f65f38f..8da2b20 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,7 @@
 ## CHARLIE development version
 
+- Support Eddie (The University of Edinburgh's HPC cluster) with `--platform=eddie` and `--runmode=qsub`. (#136, @kelly-sovacool)
+
 ## CHARLIE 0.11.1
 
 - CHARLIE was falsely throwing a file permissions error for tempdir values containing bash variables. (#118, @kelly-sovacool)
diff --git a/README.md b/README.md
index 343da1d..ce4630e 100644
--- a/README.md
+++ b/README.md
@@ -164,7 +164,8 @@ Required Arguments:
 2.  RUNMODE     : [Type: String] Valid options:
     * init      : initialize workdir
     * dryrun    : dry run snakemake to generate DAG
-    * run       : run with slurm
+    * run       : run by submitting the job with slurm
+    * qsub      : run by submitting the job with qsub
     * runlocal  : run without submitting to sbatch
     ADVANCED RUNMODES (use with caution!!)
     * unlock    : unlock WORKDIR if locked by snakemake NEVER UNLOCK WORKDIR WHERE PIPELINE IS CURRENTLY RUNNING!
@@ -172,6 +173,7 @@ Required Arguments:
     * reset     : DELETE workdir dir and re-init it (debugging option) EDITS TO ALL FILES IN WORKDIR WILL BE LOST!
     * printbinds: print singularity binds (paths)
     * local     : same as runlocal
+    * slurm     : same as run (run with slurm)
 
 Optional Arguments:
 
@@ -181,7 +183,7 @@ Optional Arguments:
 --viruses|-v    : supply comma-separated list of viruses at command line                                (--runmode=init only)
 --manifest|-s   : absolute path to samples.tsv. This will be copied to output folder                    (--runmode=init only)
 --changegrp|-z  : change group to "Ziegelbauer_lab" before running anything. Biowulf-only. Useful for correctly setting permissions.
---platform      : set the HPC platform (biowulf, fnlcr, eddie). If not set, CHARLIE will try to detect the platform with scontrol.
+--platform      : set the HPC platform (biowulf, fnlcr, eddie). If not set, CHARLIE will try to detect the platform with `scontrol`.
 --help|-h       : print this help
 
 
diff --git a/charlie b/charlie
index d0e5186..c6f4377 100755
--- a/charlie
+++ b/charlie
@@ -110,7 +110,7 @@ Required Arguments:
     * init      : initialize workdir
     * dryrun    : dry run snakemake to generate DAG
     * run       : run by submitting the job with slurm
-    * runqsub   : run by submitting the job with qsub
+    * qsub      : run by submitting the job with qsub
     * runlocal  : run locally without submitting the job to a scheduler
     ADVANCED RUNMODES (use with caution!!)
     * unlock    : unlock WORKDIR if locked by snakemake NEVER UNLOCK WORKDIR WHERE PIPELINE IS CURRENTLY RUNNING!
@@ -118,6 +118,7 @@ Required Arguments:
     * reset     : DELETE workdir dir and re-init it (debugging option) EDITS TO ALL FILES IN WORKDIR WILL BE LOST!
     * printbinds: print singularity binds (paths)
     * local     : same as runlocal
+    * slurm     : same as run (run with slurm)
 
 Optional Arguments:
 
@@ -641,12 +642,13 @@ function main(){
     dryrun) dryrun && exit 0;;
     unlock) unlock && exit 0;;
     run) runslurm && exit 0;;
-    runqsub) runqsub && exit 0;;
+    slurm) runslurm && exit 0;; # same as run
+    qsub) runqsub && exit 0;;
     runlocal) runlocal && exit 0;;
+    local) runlocal && exit 0;;                  # hidden option
     reset) reset && exit 0;;
     touch) touch && exit 0;;
     dry) dryrun && exit 0;;                      # hidden option
-    local) runlocal && exit 0;;                  # hidden option
     reconfig) reconfig && exit 0;;               # hidden option for debugging
     printbinds) printbinds && exit 0;;           # hidden option
     help) usage && exit 0;;                      # print help

From 8f794b204dcd526f103b98f18380d499769a3afe Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 17 Dec 2024 14:56:45 -0500
Subject: [PATCH 06/18] docs: note sge config sources

---
 config/eddie/README.md        | 5 +++++
 config/eddie/submit_script.sh | 1 +
 2 files changed, 6 insertions(+)
 create mode 100644 config/eddie/README.md

diff --git a/config/eddie/README.md b/config/eddie/README.md
new file mode 100644
index 0000000..0229f41
--- /dev/null
+++ b/config/eddie/README.md
@@ -0,0 +1,5 @@
+these config files were adapted from the following sources:
+
+- https://github.com/Snakemake-Profiles/sge/tree/e8175c52c0566f4d569e132e748568283c799f78
+- https://github.com/riboviz/riboviz/tree/476ee8c8fed775a795e08f24863adfee7355c486/jobs
+- https://nf-co.re/configs/eddie/
diff --git a/config/eddie/submit_script.sh b/config/eddie/submit_script.sh
index 4de3ddc..148bcef 100644
--- a/config/eddie/submit_script.sh
+++ b/config/eddie/submit_script.sh
@@ -7,6 +7,7 @@
 #$ -o logs/$JOB_NAME-$JOB_ID-$HOSTNAME.out
 #$ -e logs/$JOB_NAME-$JOB_ID-$HOSTNAME.err
 
+. /etc/profile.d/modules.sh
 $MODULE_LOAD
 $EXPORT_SING_CACHE_DIR_CMD
 

From b05f54b27d1f4b3dfe1dc80012d02e4c10e38c7d Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Fri, 27 Dec 2024 10:24:59 -0500
Subject: [PATCH 07/18] fix: create sing_cache_dir during init

---
 charlie | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/charlie b/charlie
index c6f4377..3a860e3 100755
--- a/charlie
+++ b/charlie
@@ -172,6 +172,8 @@ function init() {
   if [ -d $WORKDIR ];then err "Folder $WORKDIR already exists!"; fi
   mkdir -p $WORKDIR
 
+  mkdir -p $SING_CACHE_DIR
+
   # copy config resources
   cp -r ${PIPELINE_HOME}/config $WORKDIR/
 
@@ -592,7 +594,6 @@ function main(){
     fi
     echo "singularity cache dir (--singcache) is not set, using ${SING_CACHE_DIR}"
   fi
-  mkdir -p $SING_CACHE_DIR
   export EXPORT_SING_CACHE_DIR_CMD="export SINGULARITY_CACHEDIR=\"${SING_CACHE_DIR}\""
 
   # required files

From ac602af6e4db8947a0905ecd727553c9bd457117 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Fri, 27 Dec 2024 10:38:14 -0500
Subject: [PATCH 08/18] chore: snakemake must already be in user path for eddie

---
 charlie | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/charlie b/charlie
index 3a860e3..bf419ae 100755
--- a/charlie
+++ b/charlie
@@ -626,7 +626,8 @@ function main(){
   elif [ "$PLATFORM" == "eddie" ]; then
     # TODO fill in other variables for eddie
     CLUSTER_PROFILE="config/eddie"
-    MODULE_LOAD="module load python/$PYTHONVERSION snakemake/$SNAKEMAKEVERSION singularity"
+    MODULE_LOAD="module load python/3.8 singularity"
+    # snakemake is already in Taka's path in his bashrc
     TEMP_DIR="/exports/eddie/scratch/$USER"
   else
     CLUSTER_PROFILE="config/unknown"

From 1d8670ad9d567dc005a651f1c73035841f1d2f86 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Fri, 27 Dec 2024 10:51:27 -0500
Subject: [PATCH 09/18] fix(eddie): do not specify python version

---
 charlie | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/charlie b/charlie
index bf419ae..a1178bf 100755
--- a/charlie
+++ b/charlie
@@ -626,7 +626,7 @@ function main(){
   elif [ "$PLATFORM" == "eddie" ]; then
     # TODO fill in other variables for eddie
     CLUSTER_PROFILE="config/eddie"
-    MODULE_LOAD="module load python/3.8 singularity"
+    MODULE_LOAD="module load python singularity"
     # snakemake is already in Taka's path in his bashrc
     TEMP_DIR="/exports/eddie/scratch/$USER"
   else

From 442b6cbb35a471bcd44b3f6cd96dc111a0d1edc0 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Fri, 27 Dec 2024 10:53:28 -0500
Subject: [PATCH 10/18] fix: only load singularity module for eddie

---
 charlie | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/charlie b/charlie
index a1178bf..5d1d575 100755
--- a/charlie
+++ b/charlie
@@ -626,8 +626,8 @@ function main(){
   elif [ "$PLATFORM" == "eddie" ]; then
     # TODO fill in other variables for eddie
     CLUSTER_PROFILE="config/eddie"
-    MODULE_LOAD="module load python singularity"
-    # snakemake is already in Taka's path in his bashrc
+    MODULE_LOAD="module load singularity"
+    # python & snakemake are already in Taka's path in his bashrc
     TEMP_DIR="/exports/eddie/scratch/$USER"
   else
     CLUSTER_PROFILE="config/unknown"

From 2a8bc5b4ac3f504b3614743942f70b45e58bb72a Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 31 Dec 2024 10:49:30 -0500
Subject: [PATCH 11/18] docs: add details to warning message for unknown
 platform

---
 charlie | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/charlie b/charlie
index 5d1d575..4a635ad 100755
--- a/charlie
+++ b/charlie
@@ -631,7 +631,9 @@ function main(){
     TEMP_DIR="/exports/eddie/scratch/$USER"
   else
     CLUSTER_PROFILE="config/unknown"
-    echo """WARNING: detected platform is $PLATFORM. Please edit the files in $CLUSTER_PROFILE & config.yaml for compatibility with your computing environment
+    echo """WARNING: detected platform is $PLATFORM.
+        Please edit the files in $CLUSTER_PROFILE & config.yaml for compatibility with your computing environment.
+        Also, make sure Singularity, Snakemake $SNAKEMAKEVERSION, Python $PYTHONVERSION, and pandas are installed.
       """
   fi
   export PLATFORM CLUSTER_PROFILE PARTITION EXTRA_SINGULARITY_BINDS CONDA_ACTIVATE MODULE_LOAD TEMP_DIR REFS_DIR

From a89b1ff3dce23a4da8beafd1daa87c8110e97258 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 31 Dec 2024 12:20:41 -0500
Subject: [PATCH 12/18] fix(eddie): add Taka's venv to path

temporary workaround until we find out how to make this work more generally
---
 charlie | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/charlie b/charlie
index 4a635ad..594ff11 100755
--- a/charlie
+++ b/charlie
@@ -626,7 +626,8 @@ function main(){
   elif [ "$PLATFORM" == "eddie" ]; then
     # TODO fill in other variables for eddie
     CLUSTER_PROFILE="config/eddie"
-    MODULE_LOAD="module load singularity"
+    PATH_PREPEND='export PATH="/home/ttakanob/py3.8_venv/bin/:$PATH"'
+    MODULE_LOAD="module load singularity; $PATH_PREPEND"
     # python & snakemake are already in Taka's path in his bashrc
     TEMP_DIR="/exports/eddie/scratch/$USER"
   else

From 191b11e97bb6f82b0c08ce001000f243c3d35ad1 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Thu, 2 Jan 2025 14:04:03 -0500
Subject: [PATCH 13/18] fix(runlocal): only check slurm job id on biowulf and
 frce

to enforce interactive node only
---
 charlie | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/charlie b/charlie
index 594ff11..a712c9c 100755
--- a/charlie
+++ b/charlie
@@ -338,8 +338,6 @@ function printbinds(){
 
 function runlocal() {
   runcheck
-  # TODO do not assume $SLURM_JOB_ID exists, may run on other platform without slurm e.g. eddie
-  if [ "$SLURM_JOB_ID" == "" ];then err "runlocal can only be done on an interactive node"; exit 1; fi
   run "local"
 }
 
@@ -431,6 +429,12 @@ function run() {
 
   if [ "$1" == "local" ];then
 
+    if [ "$PLATFORM" == "biowulf" ] || [ "$PLATFORM" == "fnlcr"]; then
+      if [ "$SLURM_JOB_ID" == "" ]; then
+        err "runlocal can only be done on an interactive node";
+        exit 1;
+      fi
+    fi
     preruncleanup
 
     $EXPORT_SING_CACHE_DIR_CMD

From ab5ca2edc3c201938e8284272e5d9ee027b0b39e Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Thu, 9 Jan 2025 14:49:53 -0500
Subject: [PATCH 14/18] fix: do not submit batch job; eddie workers do not have
 qsub

---
 charlie                       |  4 ++--
 config/eddie/submit_script.sh | 18 +++---------------
 2 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/charlie b/charlie
index a712c9c..08817cd 100755
--- a/charlie
+++ b/charlie
@@ -475,8 +475,8 @@ function run() {
     preruncleanup
     cat ${WORKDIR}/config/${PLATFORM}/submit_script.sh |\
       envsubst '$CLUSTER_PROFILE $CONFIGFILE $EXPORT_SING_CACHE_DIR_CMD $MODULE_LOAD $PARTITION $SINGULARITY_BINDS $SNAKEFILE $trigger $WORKDIR' \
-      > ${WORKDIR}/submit_script.sh
-    pushd $WORKDIR && qsub submit_script.sh && popd
+      > ${WORKDIR}/run_script.sh
+    pushd $WORKDIR && bash run_script.sh && popd
 
   elif [ "$1" == "--touch" ];then
 
diff --git a/config/eddie/submit_script.sh b/config/eddie/submit_script.sh
index 148bcef..52637a7 100644
--- a/config/eddie/submit_script.sh
+++ b/config/eddie/submit_script.sh
@@ -1,11 +1,6 @@
-#!/usr/bin/env bash
-#$ -N charlie
-#$ -cwd
-#$ -l h_rt=48:00:00
-#$ -l h_vmem=40g
-#$ -pe sharedmem 2
-#$ -o logs/$JOB_NAME-$JOB_ID-$HOSTNAME.out
-#$ -e logs/$JOB_NAME-$JOB_ID-$HOSTNAME.err
+#!usr/bin/env bash
+# do not submit this script with qsub
+# as worker nodes cannot submit additional jobs themselves
 
 . /etc/profile.d/modules.sh
 $MODULE_LOAD
@@ -27,10 +22,3 @@ snakemake -s $SNAKEFILE \
     --keep-going \
     --stats ${WORKDIR}/snakemake.stats \
     2>&1 | tee ${WORKDIR}/snakemake.log
-
-if [ "$?" -eq "0" ];then
-  snakemake -s $SNAKEFILE \
-  --directory $WORKDIR \
-  --report ${WORKDIR}/runqsub_snakemake_report.html \
-  --configfile $CONFIGFILE
-fi

From 8de9454323e6c104630965846c908aaeb6fe5372 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Fri, 17 Jan 2025 15:08:52 -0500
Subject: [PATCH 15/18] feat: add back custom submit & jobscripts from sge
 profile

https://github.com/Snakemake-Profiles/sge/blob/e8175c52c0566f4d569e132e748568283c799f78/%7B%7Bcookiecutter.profile_name%7D%7D/sge-jobscript.sh
---
 config/eddie/sge-jobscript.sh |   7 +
 config/eddie/sge-submit.py    | 299 ++++++++++++++++++++++++++++++++++
 2 files changed, 306 insertions(+)
 create mode 100644 config/eddie/sge-jobscript.sh
 create mode 100644 config/eddie/sge-submit.py

diff --git a/config/eddie/sge-jobscript.sh b/config/eddie/sge-jobscript.sh
new file mode 100644
index 0000000..e416637
--- /dev/null
+++ b/config/eddie/sge-jobscript.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# properties = {properties}
+
+# exit on first error
+set -o errexit
+
+{exec_job}
diff --git a/config/eddie/sge-submit.py b/config/eddie/sge-submit.py
new file mode 100644
index 0000000..b77ce4c
--- /dev/null
+++ b/config/eddie/sge-submit.py
@@ -0,0 +1,299 @@
+#!/usr/bin/env python3
+
+import os
+import re
+import math
+import argparse
+import subprocess
+
+# use warnings.warn() rather than print() to output info in this script
+# because snakemake expects the jobid to be the only output
+import warnings
+
+from snakemake import io
+from snakemake.utils import read_job_properties
+
+DEFAULT_JOB_NAME = "snakemake_job"
+QSUB_DEFAULTS = "-cwd -V"
+CLUSTER_CONFIG = "cluster.yaml"
+
+# SGE syntax for options is `-option [value]` and for resources is `-l name=value`
+# we therefore distinguish the two in this script to make it easier to handle.
+# We also define some aliases for options and resources so that the rules can
+# be more expressive than a list of cryptic SGE resources.
+
+# We additionally pickup a list of environment modules which will be loaded in the
+# jobscript
+
+OPTION_MAPPING = {
+    "binding": ("binding",),
+    "cwd": ("cwd",),
+    "e": ("e", "error"),
+    "hard": ("hard",),
+    "j": ("j", "join"),
+    "m": ("m", "mail_options"),
+    "M": ("M", "email"),
+    "notify": ("notify",),
+    "now": ("now",),
+    "N": ("N", "name"),
+    "o": ("o", "output"),
+    "P": ("P", "project"),
+    "p": ("p", "priority"),
+    "pe": ("pe", "parallel_environment"),
+    "pty": ("pty",),
+    "q": ("q", "queue"),
+    "R": ("R", "reservation"),
+    "r": ("r", "rerun"),
+    "soft": ("soft",),
+    "v": ("v", "variable"),
+    "V": ("V", "export_env"),
+}
+
+RESOURCE_MAPPING = {
+    # default queue resources
+    "qname": ("qname",),
+    "hostname": ("hostname",),
+    # "notify" -- conflicts with OPTION_MAPPING
+    "calendar": ("calendar",),
+    "min_cpu_interval": ("min_cpu_interval",),
+    "tmpdir": ("tmpdir",),
+    "seq_no": ("seq_no",),
+    "s_rt": ("s_rt", "soft_runtime", "soft_walltime"),
+    "h_rt": ("h_rt", "time", "runtime", "walltime"),
+    "s_cpu": ("s_cpu", "soft_cpu"),
+    "h_cpu": ("h_cpu", "cpu"),
+    "s_data": ("s_data", "soft_data"),
+    "h_data": ("h_data", "data"),
+    "s_stack": ("s_stack", "soft_stack"),
+    "h_stack": ("h_stack", "stack"),
+    "s_core": ("s_core", "soft_core"),
+    "h_core": ("h_core", "core"),
+    "s_rss": ("s_rss", "soft_resident_set_size"),
+    "h_rss": ("h_rss", "resident_set_size"),
+    # default host resources
+    "slots": ("slots",),
+    "s_vmem": ("s_vmem", "soft_memory", "soft_virtual_memory"),
+    # "mem_mb" is a default snakemake resource name which will be passed in
+    "h_vmem": ("h_vmem", "mem_mb", "mem", "memory", "virtual_memory"),
+    "s_fsize": ("s_fsize", "soft_file_size"),
+    # "disk_mb" is a default snakemake resource name which will be passed in
+    "h_fsize": ("h_fsize", "disk_mb", "file_size"),
+}
+
+IGNORED_RESOURCES = ["mem_mib", "disk_mib"]
+
+NONREQUESTABLE_RESOURCES = ["tmpdir"]
+
+
+def add_custom_resources(resources, resource_mapping=RESOURCE_MAPPING):
+    """Adds new resources to resource_mapping.
+
+    resources -> dict where key is sge resource name and value is a
+                 single name or a list of names to be used as aliased
+    """
+    for key, val in resources.items():
+        if key not in resource_mapping:
+            resource_mapping[key] = tuple()
+
+        # make sure the resource name itself is an alias
+        resource_mapping[key] += (key,)
+        if isinstance(val, list):
+            for alias in val:
+                if val != key:
+                    resource_mapping[key] += (alias,)
+        else:
+            if val != key:
+                resource_mapping[key] += (val,)
+
+
+def parse_jobscript():
+    """Minimal CLI to require/only accept single positional argument."""
+    p = argparse.ArgumentParser(description="SGE snakemake submit script")
+    p.add_argument("jobscript", help="Snakemake jobscript with job properties.")
+    return p.parse_args().jobscript
+
+
+def parse_qsub_defaults(parsed):
+    """Unpack QSUB_DEFAULTS."""
+    d = parsed.split() if type(parsed) == str else parsed
+
+    options = {}
+    for arg in d:
+        if "=" in arg:
+            k, v = arg.split("=")
+            options[k.strip("-")] = v.strip()
+        else:
+            options[arg.strip("-")] = ""
+    return options
+
+
+def format_job_properties(string):
+    # we use 'rulename' rather than 'rule' for consistency with the --cluster-config
+    # snakemake option
+    if job_properties["type"] == "group":
+        return string.format(rulename="snakejob", jobid=job_properties["jobid"])
+    return string.format(rulename="snakejob", jobid=job_properties["jobid"])
+
+
+def parse_qsub_settings(
+    source, resource_mapping=RESOURCE_MAPPING, option_mapping=OPTION_MAPPING
+):
+    job_options = {"options": {}, "resources": {}}
+
+    for skey, sval in source.items():
+        found = False
+        for rkey, rval in resource_mapping.items():
+            if skey in IGNORED_RESOURCES:
+                found = True
+                break
+            if skey in rval:
+                found = True
+                # Snakemake resources can only be defined as integers, but SGE interprets
+                # plain integers for memory as bytes. This hack means we interpret memory
+                # requests as megabytes which maps to the snakemake resources "mem_mb"
+                # and "disk_mb".
+                if (rkey == "s_vmem") or (rkey == "h_vmem"):
+                    job_options["resources"].update({rkey: str(sval) + "M"})
+                elif (rkey == "s_fsize") or (rkey == "h_fsize"):
+                    job_options["resources"].update({rkey: str(sval) + "M"})
+                else:
+                    job_options["resources"].update({rkey: sval})
+                break
+        if found:
+            continue
+        for okey, oval in option_mapping.items():
+            if skey in oval:
+                found = True
+                job_options["options"].update({okey: sval})
+                break
+        if not found:
+            raise KeyError(f"Unknown SGE option or resource: {skey}")
+
+    return job_options
+
+
+def load_cluster_config(path):
+    """Load config to dict either from absolute path or relative to profile dir."""
+    if path:
+        path = os.path.join(os.path.dirname(__file__), os.path.expandvars(path))
+        default_cluster_config = io.load_configfile(path)
+    else:
+        default_cluster_config = {}
+    if "__default__" not in default_cluster_config:
+        default_cluster_config["__default__"] = {}
+    return default_cluster_config
+
+
+def ensure_directory_exists(path):
+    """Check if directory exists and create if not"""
+    directory = os.path.dirname(path)
+    if not os.path.exists(directory):
+        os.makedirs(directory, exist_ok=True)
+    return
+
+
+def update_double_dict(outer, inner):
+    """Similar to dict.update() but does the update on nested dictionaries"""
+    for k, v in outer.items():
+        outer[k].update(inner[k])
+
+
+def sge_option_string(key, val):
+    if val == "":
+        return f"-{key}"
+    if type(val) == bool:
+        return f"-{key} " + ("yes" if val else "no")
+    return format_job_properties(f"-{key} {val}")
+
+
+def sge_resource_string(key, val):
+    if val == "":
+        return f"-l {key}"
+    if type(val) == bool:
+        return f"-{key}=" + ("true" if val else "false")
+    return f"-l {key}={val}"
+
+
+def submit_job(jobscript, qsub_settings):
+    """Submit jobscript and return jobid."""
+
+    # remove any non-requestable resources which have somehow been added to
+    # the resource list
+    for resource in list(qsub_settings["resources"].keys()):
+        if resource in NONREQUESTABLE_RESOURCES:
+            del qsub_settings["resources"][resource]
+
+    flatten = lambda l: [item for sublist in l for item in sublist]
+    batch_options = flatten(
+        [sge_option_string(k, v).split() for k, v in qsub_settings["options"].items()]
+    )
+    batch_resources = flatten(
+        [
+            sge_resource_string(k, v).split()
+            for k, v in qsub_settings["resources"].items()
+        ]
+    )
+    try:
+        # -terse means only the jobid is returned rather than the normal 'Your job...' string
+        jobid = (
+            subprocess.check_output(
+                ["qsub", "-terse"] + batch_options + batch_resources + [jobscript]
+            )
+            .decode()
+            .rstrip()
+        )
+    except subprocess.CalledProcessError as e:
+        raise e
+    except Exception as e:
+        raise e
+    return jobid
+
+
+qsub_settings = {"options": {}, "resources": {}}
+
+jobscript = parse_jobscript()
+
+# get the job properties dictionary from snakemake
+job_properties = read_job_properties(jobscript)
+
+# load the default cluster config
+cluster_config = load_cluster_config(CLUSTER_CONFIG)
+
+if "__resources__" in cluster_config:
+    add_custom_resources(cluster_config["__resources__"])
+
+# qsub default arguments
+update_double_dict(
+    qsub_settings, parse_qsub_settings(parse_qsub_defaults(QSUB_DEFAULTS))
+)
+
+# cluster_config defaults
+update_double_dict(qsub_settings, parse_qsub_settings(cluster_config["__default__"]))
+
+# resources defined in the snakemake file (note that these must be integer)
+# we pass an empty dictionary for option_mapping because options should not be
+# specified in the snakemake file
+update_double_dict(
+    qsub_settings,
+    parse_qsub_settings(job_properties.get("resources", {}), option_mapping={}),
+)
+
+# get any rule specific options/resources from the default cluster config
+update_double_dict(
+    qsub_settings,
+    parse_qsub_settings(cluster_config.get(job_properties.get("rule"), {})),
+)
+
+# get any options/resources specified through the --cluster-config command line argument
+update_double_dict(
+    qsub_settings, parse_qsub_settings(job_properties.get("cluster", {}))
+)
+
+# ensure qsub output dirs exist
+for o in ("o", "e"):
+    ensure_directory_exists(qsub_settings["options"][o]) if o in qsub_settings[
+        "options"
+    ] else None
+
+# submit job and echo id back to Snakemake (must be the only stdout)
+print(submit_job(jobscript, qsub_settings))

From e0778f10785e486ee27d16a952544335f0bee8bd Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 21 Jan 2025 12:17:34 -0500
Subject: [PATCH 16/18] chore: make scripts executable

---
 config/eddie/sge-cancel.py    | 0
 config/eddie/sge-jobscript.sh | 0
 config/eddie/sge-status.py    | 0
 config/eddie/submit_script.sh | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 mode change 100644 => 100755 config/eddie/sge-cancel.py
 mode change 100644 => 100755 config/eddie/sge-jobscript.sh
 mode change 100644 => 100755 config/eddie/sge-status.py
 mode change 100644 => 100755 config/eddie/submit_script.sh

diff --git a/config/eddie/sge-cancel.py b/config/eddie/sge-cancel.py
old mode 100644
new mode 100755
diff --git a/config/eddie/sge-jobscript.sh b/config/eddie/sge-jobscript.sh
old mode 100644
new mode 100755
diff --git a/config/eddie/sge-status.py b/config/eddie/sge-status.py
old mode 100644
new mode 100755
diff --git a/config/eddie/submit_script.sh b/config/eddie/submit_script.sh
old mode 100644
new mode 100755

From 1740bc2c86242873364e5763131d763701abd0f8 Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 21 Jan 2025 12:19:01 -0500
Subject: [PATCH 17/18] fix: use qsub -V & mem_mib

---
 config/eddie/add_mem_mib.py | 17 +++++++++++++++
 config/eddie/cluster.yaml   | 42 +++++++++++++++++++------------------
 config/eddie/config.yaml    |  4 ++--
 3 files changed, 41 insertions(+), 22 deletions(-)
 create mode 100644 config/eddie/add_mem_mib.py

diff --git a/config/eddie/add_mem_mib.py b/config/eddie/add_mem_mib.py
new file mode 100644
index 0000000..6a8f102
--- /dev/null
+++ b/config/eddie/add_mem_mib.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+
+import ruamel.yaml
+
+yaml = ruamel.yaml.YAML()
+yaml.preserve_quotes = True
+yaml.explicit_start = True
+
+with open("cluster.yaml", "r") as infile:
+    data = yaml.load(infile)
+
+for k, v in data.items():
+    if "mem" in v:
+        data[k]["mem_mib"] = int(v["mem"].rstrip("g")) * 1024
+
+with open("cluster.yaml.2", "w") as outfile:
+    yaml.dump(data, outfile)
diff --git a/config/eddie/cluster.yaml b/config/eddie/cluster.yaml
index 995ad93..1351a3f 100644
--- a/config/eddie/cluster.yaml
+++ b/config/eddie/cluster.yaml
@@ -1,89 +1,91 @@
 __default__:
   output: "logs/{rule}.{wildcards}.$JOB_ID.out"
   error: "logs/{rule}.{wildcards}.$JOB_ID.err"
-  mem: 40g
+  mem_mib: 40960
   threads: 2
   time: 4:00:00
   name: "{rule}.{wildcards}"
 
+test:
+  mem_mib: 10240
 cutadapt:
-  mem: 120g
+  mem_mib: 122880
   threads: 56
   time: 6:00:00
 dcc:
-  mem: 120g
+  mem_mib: 122880
   threads: 4
   time: 4:00:00
 find_circ_align:
-  mem: 120g
+  mem_mib: 122880
   threads: 56
   time: 6:00:00
 find_circ:
-  mem: 120g
+  mem_mib: 122880
   threads: 56
   time: 6:00:00
 mapsplice:
-  mem: 200g
+  mem_mib: 204800
   threads: 56
   time: 48:00:00
 mapsplice_postprocess:
-  mem: 120g
+  mem_mib: 122880
   threads: 4
   time: 4:00:00
 nclscan:
-  mem: 512g
+  mem_mib: 524288
   threads: 56
   time: 4:00:00
   partition: largemem
 fastqc:
-  mem: 40g
+  mem_mib: 40960
   threads: 4
   time: 4:00:00
 ciri:
-  mem: 512g
+  mem_mib: 524288
   threads: 56
   time: 4:00:00
   partition: largemem
 filter_ciri_bam_for_BSJs:
-  mem: 512g
+  mem_mib: 524288
   threads: 4
   time: 24:00:00
   partition: largemem
 create_index:
-  mem: 200g
+  mem_mib: 204800
   threads: 56
   time: 12:00:00
 star1p:
-  mem: 200g
+  mem_mib: 204800
   threads: 56
   time: 6:00:00
 star2p:
-  mem: 200g
+  mem_mib: 204800
   threads: 56
   time: 6:00:00
 star_circrnafinder:
-  mem: 200g
+  mem_mib: 204800
   threads: 56
   time: 6:00:00
 estimate_duplication:
-  mem: 200g
+  mem_mib: 204800
   threads: 4
   time: 4:00:00
 create_circExplorer_BSJ_bam:
-  mem: 120g
+  mem_mib: 122880
   threads: 4
   time: 4:00:00
 create_circExplorer_linear_spliced_bams:
-  mem: 120g
+  mem_mib: 122880
   threads: 56
   time: 8:00:00
 clear:
   time: 1:00:00
 split_splice_reads_BAM_create_BW:
-  mem: 120g
+  mem_mib: 122880
   time: 24:00:00
 split_linear_reads_BAM_create_BW:
-  mem: 120g
+  mem_mib: 122880
   time: 24:00:00
 alignment_stats:
   time: 1:00:00
diff --git a/config/eddie/config.yaml b/config/eddie/config.yaml
index 7718c71..0cac0cb 100644
--- a/config/eddie/config.yaml
+++ b/config/eddie/config.yaml
@@ -1,7 +1,7 @@
 cluster: qsub
-  -cwd
+  -terse -cwd -V
   -l h_rt={cluster.time}
-  -l h_vmem={cluster.mem}
+  -l h_vmem={cluster.mem_mib}M
   -pe sharedmem {cluster.threads}
   -N {cluster.name}
   -o {cluster.output}

From 440cee977a3cd6c3d7df5ea8ab751dfa2f0bbb8f Mon Sep 17 00:00:00 2001
From: Kelly Sovacool <kelly.sovacool@nih.gov>
Date: Tue, 21 Jan 2025 12:32:30 -0500
Subject: [PATCH 18/18] fix: set singularity cachedir & tmpdir in submission
 script

for eddie, need to set cachedir to /exports/eddie/scratch/$USER/.singularity
and set sing tmpdir to \$TMPDIR
---
 charlie                         | 11 +++++++----
 config/biowulf/submit_script.sh |  2 +-
 config/eddie/submit_script.sh   |  3 ++-
 config/fnlcr/submit_script.sh   |  2 +-
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/charlie b/charlie
index 08817cd..c36a096 100755
--- a/charlie
+++ b/charlie
@@ -437,7 +437,8 @@ function run() {
     fi
     preruncleanup
 
-    $EXPORT_SING_CACHE_DIR_CMD
+    export SINGULARITY_CACHEDIR=$SING_CACHE_DIR
+    export SINGULARITY_TMPDIR=$TMPDIR
 
     snakemake -s $SNAKEFILE\
     --directory $WORKDIR \
@@ -466,7 +467,7 @@ function run() {
 
     preruncleanup
     cat ${WORKDIR}/config/${PLATFORM}/submit_script.sh |\
-      envsubst '$CLUSTER_PROFILE $CONFIGFILE $EXPORT_SING_CACHE_DIR_CMD $MODULE_LOAD $PARTITION $SINGULARITY_BINDS $SNAKEFILE $trigger $WORKDIR' \
+      envsubst '$CLUSTER_PROFILE $CONFIGFILE $SING_CACHE_DIR $MODULE_LOAD $PARTITION $SINGULARITY_BINDS $SNAKEFILE $trigger $WORKDIR' \
       > ${WORKDIR}/submit_script.sh
     sbatch ${WORKDIR}/submit_script.sh
 
@@ -474,7 +475,7 @@ function run() {
 
     preruncleanup
     cat ${WORKDIR}/config/${PLATFORM}/submit_script.sh |\
-      envsubst '$CLUSTER_PROFILE $CONFIGFILE $EXPORT_SING_CACHE_DIR_CMD $MODULE_LOAD $PARTITION $SINGULARITY_BINDS $SNAKEFILE $trigger $WORKDIR' \
+      envsubst '$CLUSTER_PROFILE $CONFIGFILE $SING_CACHE_DIR $MODULE_LOAD $PARTITION $SINGULARITY_BINDS $SNAKEFILE $trigger $WORKDIR' \
       > ${WORKDIR}/run_script.sh
     pushd $WORKDIR && bash run_script.sh && popd
 
@@ -593,12 +594,14 @@ function main(){
   if [[ -z "$SING_CACHE_DIR" ]]; then
     if [[ -d "/data/$USER" ]]; then
       SING_CACHE_DIR="/data/$USER/.singularity"
+    elif [[ "$PLATFORM" == "eddie" ]]; then
+      SING_CACHE_DIR="/exports/eddie/scratch/$USER/.singularity"
     else
       SING_CACHE_DIR="${WORKDIR}/.singularity"
     fi
+    export SING_CACHE_DIR
     echo "singularity cache dir (--singcache) is not set, using ${SING_CACHE_DIR}"
   fi
-  export EXPORT_SING_CACHE_DIR_CMD="export SINGULARITY_CACHEDIR=\"${SING_CACHE_DIR}\""
 
   # required files
   export CONFIGFILE="${WORKDIR}/config.yaml"
diff --git a/config/biowulf/submit_script.sh b/config/biowulf/submit_script.sh
index 926718b..8f7087d 100644
--- a/config/biowulf/submit_script.sh
+++ b/config/biowulf/submit_script.sh
@@ -8,7 +8,7 @@
 
 cd $SLURM_SUBMIT_DIR
 $MODULE_LOAD
-$EXPORT_SING_CACHE_DIR_CMD
+export SINGULARITY_CACHEDIR=$SING_CACHE_DIR
 
 snakemake -s $SNAKEFILE \
     --directory $WORKDIR \
diff --git a/config/eddie/submit_script.sh b/config/eddie/submit_script.sh
index 52637a7..fa7617f 100755
--- a/config/eddie/submit_script.sh
+++ b/config/eddie/submit_script.sh
@@ -4,7 +4,8 @@
 
 . /etc/profile.d/modules.sh
 $MODULE_LOAD
-$EXPORT_SING_CACHE_DIR_CMD
+export SINGULARITY_CACHEDIR=$SING_CACHE_DIR
+export SINGULARITY_TMPDIR=$TMPDIR
 
 snakemake -s $SNAKEFILE \
     --directory $WORKDIR \
diff --git a/config/fnlcr/submit_script.sh b/config/fnlcr/submit_script.sh
index 926718b..8f7087d 100644
--- a/config/fnlcr/submit_script.sh
+++ b/config/fnlcr/submit_script.sh
@@ -8,7 +8,7 @@
 
 cd $SLURM_SUBMIT_DIR
 $MODULE_LOAD
-$EXPORT_SING_CACHE_DIR_CMD
+export SINGULARITY_CACHEDIR=$SING_CACHE_DIR
 
 snakemake -s $SNAKEFILE \
     --directory $WORKDIR \