bedapub · grexor · Mar 20, 2025 · Feb 20, 2025 · Feb 20, 2025 · Feb 20, 2025
diff --git a/Snakefile b/Snakefile
@@ -6,11 +6,10 @@ DEFAULT_CORES = config["defaults"]["cores"]
 DEFAULT_MEM = config["defaults"]["mem"]
 DEFAULT_TIME = config["defaults"]["time"]
 
-alignIntronMax_text = f"--alignIntronMax {config['mapping']['alignIntronMax']}" if config["mapping"]["alignIntronMax"] is not None else ""
-
 # parameters for featureCounts based on config
 db_library_type_insert = {"single-end":"", "paired-end":"-p "}
 db_library_strand_insert = {"FIRST_READ_TRANSCRIPTION_STRAND":1, "SINGLE_STRAND":1, "SINGLE_REVERSE":1, "SECOND_READ_TRANSCRIPTION_STRAND":2, "NONE":0}
+alignIntronMax_text = f"--alignIntronMax {config['mapping']['alignIntronMax']}" if config["mapping"]["alignIntronMax"] is not None else ""
 
 if not os.path.exists("results"):
     splicekit.setup()
@@ -20,8 +19,6 @@ if not os.path.exists("annotation/comparisons.tab"):
 
 splicekit_folder = os.path.dirname(splicekit.__file__)
 
-container: "docker://ghcr.io/bedapub/splicekit:main"
-
 samples_df = pd.read_csv("samples.tab", sep="\t", comment="#")
 SAMPLES = samples_df["sample_id"].tolist()
 
@@ -47,12 +44,6 @@ input_files = [
             # annotation
             "annotation/comparisons.tab",
 
-            # bam files
-            *expand("bam/{sample}.bam", sample=SAMPLES),
-
-            # bai files
-            *expand("bam/{sample}.bam.bai", sample=SAMPLES),
-
             # bw files
             *expand("results/results_jbrowse/{sample}.bw", sample=SAMPLES),
 
@@ -105,10 +96,17 @@ input_files = [
             "report/index.html",  # report
 ]
 
+# mapping FASTQ -> bam?
+if config["mapping"]["perform_mapping"]:
+    for sample_id in SAMPLES:
+        input_files.append(f"bam/{sample_id}.bam")
+        input_files.append(f"bam/{sample_id}.bam.bai")
+
 # process scanRBP
 if splicekit.config.scanRBP:
     input_files.append("results/motifs/scanRBP/scanRBP.done")
 
+
 rule all:
     input:
         input_files
@@ -178,9 +176,9 @@ rule bam_bw_cram:
         "logs/bam_bw_cram/{sample}.log",
     shell:
         f"""
-        bamCoverage --binSize 5 -b bam/{{wildcards.sample}}.bam -o results/results_jbrowse/{{wildcards.sample}}.bw -of bigwig
-        samtools view -C -T {splicekit.config.fasta_path} bam/{{wildcards.sample}}.bam -O CRAM -o results/results_jbrowse/{{wildcards.sample}}.cram
-        samtools index results/results_jbrowse/{{wildcards.sample}}.cram
+        bamCoverage --numberOfProcessors max --binSize 5 -b bam/{{wildcards.sample}}.bam -o results/results_jbrowse/{{wildcards.sample}}.bw -of bigwig
+        samtools view -@ {{resources.cores}} -C -T {splicekit.config.fasta_path} bam/{{wildcards.sample}}.bam -O CRAM -o results/results_jbrowse/{{wildcards.sample}}.cram
+        samtools index -@ {{resources.cores}} results/results_jbrowse/{{wildcards.sample}}.cram
         """
 
 rule junctions:

diff --git a/config.yaml b/config.yaml
@@ -1,5 +1,6 @@
 mapping:
-  alignIntronMax: None # leave at None or set a number [nt], e.g.: 5000
+  perform_mapping: True, # should splicekit perform mapping of reads from FASTQ to the reference genome with pybio? (True=yes, False=no)
+  alignIntronMax: None   # leave at None or set a number [nt], e.g.: 5000
 
 defaults:
   cores: 1
@@ -8,27 +9,27 @@ defaults:
 
 map_fastq_single:
   cores: 8
-  mem: 40           # GB
+  mem: 8            # GB
   time: "02:00:00"  # hours
 
 map_fastq_paired:
   cores: 8
-  mem: 40           # GB
+  mem: 8            # GB
   time: "02:00:00"  # hours
 
 bam_index:
   cores: 8
-  mem: 8            # GB
+  mem: 2            # GB
   time: "02:00:00"  # hours
 
 bam_bw:
   cores: 8
-  mem: 16           # GB
+  mem: 2            # GB
   time: "02:00:00"  # hours
 
 feature_counts:
   cores: 8
-  mem: 4            # GB
+  mem: 2            # GB
   time: "01:00:00"  # hours
 
 edgeR:

diff --git a/splicekit/core/jbrowse2.py b/splicekit/core/jbrowse2.py
@@ -15,11 +15,12 @@ def start():
     server()
 
 def server():
-    hostname=socket.gethostname()
-    ip_addr=socket.gethostbyname(hostname)
+    hostname=config.hostname
+    ip_addr=config.ip_addr
     Handler = RangeHTTPServer.RangeRequestHandler
     socketserver.TCPServer.allow_reuse_address = True
     with socketserver.TCPServer(("", config.jbrowse2_port), Handler) as httpd:
+        print(f"{module_desc} html report at http://{ip_addr}:{config.jbrowse2_port}/report")
         print(f"{module_desc} http://{ip_addr}:{config.jbrowse2_port}/jbrowse2/?config=splicekit_data/config.json")
         httpd.serve_forever()
 

diff --git a/splicekit/core/motifs.py b/splicekit/core/motifs.py
@@ -760,4 +760,4 @@ def process():
     make_logos()
     dreme()
     make_distance()
-    cluster()
+    cluster()