Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions lib/kb_bedtools/kb_bedtoolsImpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
from installed_clients.ReadsUtilsClient import ReadsUtils
from base import Core

from kb_bedtools.utils import Intersection
from kb_bedtools.utils import BamConversion
from kb_bedtools.utils import BamConversion, Intersection

#END_HEADER

Expand Down
69 changes: 56 additions & 13 deletions lib/kb_bedtools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import logging
import os
import subprocess
import uuid
import shutil

from collections import Counter
from shutil import copyfile
Expand Down Expand Up @@ -45,7 +47,12 @@ def do_analysis(self, params: dict):
wsname = params['workspace_name']
sequencing_tech = 'Illumina'
interleaved = params['interleaved']
fastq_path = self.bam_to_fastq(staging_path, shared_folder=self.shared_folder)
if params.get('paired_end'):
fastq_path = self.bam_to_paired_fastq(staging_path, shared_folder=self.shared_folder)
else:
fastq_path = self.bam_to_fastq(staging_path, shared_folder=self.shared_folder)


reads_result = self.upload_reads(output_name, fastq_path, wsname, sequencing_tech, interleaved)


Expand All @@ -69,27 +76,63 @@ def do_analysis(self, params: dict):

@classmethod
def bam_to_fastq(cls, bam_file, shared_folder=""): # add a dict parameter so those parameter could be use
with open(bam_file, 'rb') as file:
bam_data = file.read().decode('utf-8', 'ignore')
# best to use logging here so that messages are more visible
if not os.path.isfile(bam_file):
raise FileNotFoundError(f"{bam_file} not found")

unique_id = str(uuid.uuid4())[:8]
temp_fastq = f"filename_end1_{unique_id}.fq"
output_path = os.path.join(shared_folder, f"output_{unique_id}.fq")

logging.warning(f'{">"*20}{os.getcwd()}')

with subprocess.Popen([
'bedtools', 'bamtofastq', '-i', bam_file, '-fq', 'filename_end1.fq'
'bedtools', 'bamtofastq', '-i', bam_file, '-fq', temp_fastq
]) as proc:
proc.wait()
if not os.path.exists("filename_end1.fq"):
raise FileNotFoundError("bedtools did not create FASTQ file")

if os.path.getsize("filename_end1.fq") < 100:
if not os.path.exists(temp_fastq):
raise FileNotFoundError("bedtools did not create FASTQ file")

if os.path.getsize(temp_fastq) < 100:
raise ValueError("Generated FASTQ file is unexpectedly small — check input BAM or bedtools error")

output_path = os.path.join(shared_folder, 'output.fq')
copyfile('filename_end1.fq', output_path)

shutil.copyfile(temp_fastq, output_path)
return output_path


def upload_reads(self, name, reads_path, workspace_name, sequencing_tech, interleaved):
@classmethod
def bam_to_fastq_paired(cls, bam_file, shared_folder=""):
if not os.path.isfile(bam_file):
raise FileNotFoundError(f"{bam_file} not found")

unique_id = str(uuid.uuid4())[:8]
read1_file = f"filename_end1_{unique_id}.fq"
read2_file = f"filename_end2_{unique_id}.fq"


logging.warning(f"{'>'*20}{os.getcwd()}")

with subprocess.Popen([
'bedtools', 'bamtofastq',
'-i', bam_file,
'-fq', read1_file,
'-fq2', read2_file
]) as proc:
proc.wait()

if not os.path.exists(read1_file) or not os.path.exists(read2_file):
raise FileNotFoundError("Paired-end FASTQ files were not created")


output1 = os.path.join(shared_folder, f"read1_{unique_id}.fq")
output2 = os.path.join(shared_folder, f"read2_{unique_id}.fq")

shutil.copyfile(read1_file, output1)
shutil.copyfile(read2_file, output2)

return {"read1": output1, "read2": output2}

def upload_reads(self, name, reads_path, workspace_name,
sequencing_tech=None, interleaved=None):
"""
Upload reads back to the KBase Workspace. This method only uses the
minimal parameters necessary to provide a demonstration. There are many
Expand Down
1 change: 1 addition & 0 deletions test/kb_bedtools_server_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ def test_intersect(self):
"output_name": "intersectOutput",
})

def test_bamtofastq(self):
params = {
"workspace_name": self.wsName,
"reads_ref": "70257/2/1",
Expand Down
Binary file modified test/minimal.bam
Binary file not shown.
71 changes: 70 additions & 1 deletion test/unit_tests/test_kb_bedtools_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import logging
import subprocess

import os
import pytest
from kb_bedtools.utils import BamConversion, Intersection


@pytest.fixture
Expand All @@ -15,3 +16,71 @@ def process():
def test_process(process):
logging.info("Running a test")
assert 1 == 1

def test_bam_to_fastq_creates_output(tmp_path):
output_dir = tmp_path / "output"
output_dir.mkdir()

bam_path = os.path.join(os.path.dirname(__file__), "..", "minimal.bam")

result_path = BamConversion.bam_to_fastq(bam_path, str(output_dir))

assert os.path.exists(result_path), "FASTQ output file was not created"
with open(result_path) as f:
contents = f.read()
assert contents.startswith("@"), "FASTQ output does not appear valid"

def test_bam_to_fastq_invalid_input_raises():
with pytest.raises(Exception):
BamConversion.bam_to_fastq("/invalid/path/to/file.bam")

def test_bam_to_fastq_empty_file_raises(tmp_path):
empty_bam = tmp_path / "empty.bam"
empty_bam.touch()

with pytest.raises(Exception):
BamConversion.bam_to_fastq(str(empty_bam), str(tmp_path))


def test_bam_to_fastq_multiple_outputs(tmp_path):
bam_path = os.path.join(os.path.dirname(__file__), "..", "minimal.bam")

result1 = BamConversion.bam_to_fastq(bam_path, tmp_path)
result2 = BamConversion.bam_to_fastq(bam_path, tmp_path)

assert result1 != result2, "Multiple conversions should create unique output files"

def test_bam_to_fastq_paired_end_output(tmp_path):
bam_path = os.path.join(os.path.dirname(__file__), "..", "minimal.bam")

result_paths = BamConversion.bam_to_fastq_paired(
bam_file=bam_path,
shared_folder=str(tmp_path)
)

try:

assert isinstance(result_paths, dict), "Output should be a dictionary"
assert "read1" in result_paths and "read2" in result_paths, "Missing paired-end keys"
assert os.path.exists(result_paths["read1"]), "FASTQ for read 1 not created"
assert os.path.exists(result_paths["read2"]), "FASTQ for read 2 not created"


with open(result_paths["read1"]) as f1, open(result_paths["read2"]) as f2:
content1 = f1.read(1)
content2 = f2.read(1)

if content1:
assert content1 == "@", "Read 1 output is not a valid FASTQ file"
else:
print("Warning: Read 1 FASTQ file is empty")

if content2:
assert content2 == "@", "Read 2 output is not a valid FASTQ file"
else:
print ("Warning: Read 2 FASTQ file is empty")

finally:
for path in result_paths.values():
if os.path.exists(path):
os.remove(path)
3 changes: 2 additions & 1 deletion ui/narrative/methods/run_kb_bedtools/display.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,6 @@ parameters :
long-hint : |
Select if reads file is interleaved


description : |
<p> Takes a BAM file and returns a FASTQ file.</p>
<p> Takes a BAM file and returns a FASTQ file.</p>