Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
Change Log
==========

3.16.0
=====
* Add CLI command to get an overview of Somalier results in SMaHT.


3.15.0
=====
* fix to manage fixed shard dependencies on a single non scatter/gather/sharded step
Expand Down
17 changes: 17 additions & 0 deletions magma_smaht/commands/wrangler_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,23 @@ def sample_identity_check_status(num_files, auth_env):
wrangler_utils.sample_identity_check_status(num_files, smaht_key)


@cli.command()
@click.help_option("--help", "-h")
@click.option(
"-e",
"--auth-env",
required=True,
type=str,
help="Name of environment in smaht-keys file",
)
def sample_identity_check_results(auth_env):
"""
Check sample identity results for each donor.
"""
smaht_key = get_auth_key(auth_env)
wrangler_utils.sample_identity_check_results(smaht_key)


@cli.command()
@click.help_option("--help", "-h")
@click.option(
Expand Down
31 changes: 31 additions & 0 deletions magma_smaht/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,3 +586,34 @@ def get_item_es(identifier, key, frame="raw"):
def _serialize_key(key_dict):
"""Convert dictionary key to a hashable string for caching."""
return json.dumps(key_dict, sort_keys=True)

class bcolors:
HEADER = "\033[95m"
OKBLUE = "\033[94m"
OKCYAN = "\033[96m"
OKGREEN = "\033[92m"
WARNING = "\033[93m"
FAIL = "\033[91m"
ENDC = "\033[0m"
BOLD = "\033[1m"
UNDERLINE = "\033[4m"


def ok_blue_text(text: str) -> str:
return f"{bcolors.OKBLUE}{text}{bcolors.ENDC}"


def ok_green_text(text: str) -> str:
return f"{bcolors.OKGREEN}{text}{bcolors.ENDC}"


def bold_text(text: str) -> str:
return f"{bcolors.BOLD}{text}{bcolors.ENDC}"


def warning_text(text: str) -> str:
return f"{bcolors.WARNING}{text}{bcolors.ENDC}"


def fail_text(text: str) -> str:
return f"{bcolors.FAIL}{text}{bcolors.ENDC}"
32 changes: 28 additions & 4 deletions magma_smaht/wrangler_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@
get_file_set,
get_donors_from_mwfr,
get_item,
get_item_es,
get_tag_for_sample_identity_check,
get_wfr_from_mwfr,
get_latest_somalier_run_for_donor,
generate_input_structure,
mwfr_from_input,
get_all_donors
get_all_donors,
ok_green_text,
fail_text,
)

from magma_smaht.constants import (
Expand Down Expand Up @@ -158,7 +161,7 @@ def reset_mwfrs(mwfr_uuids: list, smaht_key: dict):
def reset_all_failed_mwfrs(smaht_key: dict, ignore_md5 : bool):

url = (
"/search/?final_status=failed&type=MetaWorkflowRun"
"/search/?final_status=failed&type=MetaWorkflowRun&limit=100"
if not ignore_md5
else "/search/?final_status=failed&type=MetaWorkflowRun&meta_workflow.name%21=md5"
)
Expand Down Expand Up @@ -443,7 +446,7 @@ def sample_identity_check_status(num_files: int, smaht_key: dict):
f"&limit={num_files}"
"&sort=date_created"
"&meta_workflow_run_inputs.meta_workflow.name%21=sample_identity_check"
"&description%21=Annotated FLNC output BAM" # Exclude Kinnex FLNC BAMs as they don't show high relatedness values
"&description%21=FLNC (full-length, non-chimeric cDNA reads) aligned BAM" # Exclude Kinnex FLNC BAMs as they don't show high relatedness values
)
output_files = ff_utils.search_metadata(f"/search/{search_filter}", key=smaht_key)

Expand Down Expand Up @@ -481,7 +484,7 @@ def sample_identity_check_status(num_files: int, smaht_key: dict):
)
continue
mwfr = mwfrs[0]
mwfr = get_item(mwfr[UUID], smaht_key, frame="embedded")
mwfr = get_item_es(mwfr[UUID], smaht_key, frame="embedded")

# Only consider files that are outputs for alignment workflows or bam2cram conversions
if "Alignment" not in mwfr["meta_workflow"]["category"] and mwfr["meta_workflow"]["name"] != "bam_to_cram":
Expand Down Expand Up @@ -541,6 +544,27 @@ def sample_identity_check_status(num_files: int, smaht_key: dict):
)


def sample_identity_check_results(smaht_key: dict):
"""Check latest sample identity check results for each donor."""
donors = get_all_donors("object", smaht_key)
donors_sorted = sorted(donors, key=lambda d: d.get("external_id", ""))
for donor in donors_sorted:
donor_display_title = donor["display_title"]
latest_run = get_latest_somalier_run_for_donor(donor[ACCESSION], smaht_key)
if not latest_run:
continue

latest_run = latest_run[0]
somalier_relate = get_wfr_from_mwfr(latest_run, "somalier_relate", 0)
qc_result = somalier_relate["output"][0]["file"]["quality_metrics"][0][
"overall_quality_status"
]
qc_result = ok_green_text(qc_result) if qc_result == "Pass" else fail_text(qc_result)
print(
f"{donor_display_title}: {qc_result} (MWFR: {latest_run[ACCESSION]})"
)


def purge_fileset(
fileset_accession: str,
dry_run: bool,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "magma-suite"
version = "3.15.0"
version = "3.16.0"
description = "Collection of tools to manage meta-workflows automation."
authors = ["Michele Berselli <berselli.michele@gmail.com>", "Doug Rioux", "Soo Lee", "CGAP team"]
license = "MIT"
Expand Down