From 29da1240b7b81d86a904fdc6c740ecb658fd4e61 Mon Sep 17 00:00:00 2001 From: William Fondrie Date: Wed, 26 Jun 2024 15:07:51 -0700 Subject: [PATCH] Added simple system test --- petasus/scripts/sage2lib.py | 6 +++++- tests/data/results.sage.csv | 2 ++ tests/system_tests/test_sage2lib.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 tests/data/results.sage.csv create mode 100644 tests/system_tests/test_sage2lib.py diff --git a/petasus/scripts/sage2lib.py b/petasus/scripts/sage2lib.py index 2eb6da6..816e167 100644 --- a/petasus/scripts/sage2lib.py +++ b/petasus/scripts/sage2lib.py @@ -24,7 +24,11 @@ def read_peptides(peptides, qvalue): polars.DataFrame The parsed and filtered peptides. """ - peptide_df = pl.read_csv(peptides, separator="\t") + try: + peptide_df = pl.read_parquet(peptides) + except pl.exceptions.ComputeError: + peptide_df = pl.read_csv(peptides, separator="\t") + peptide_df.columns = [c.lower() for c in peptide_df.columns] if "mokapot q-value" in peptide_df.columns: qval_col = "mokapot q-value" diff --git a/tests/data/results.sage.csv b/tests/data/results.sage.csv new file mode 100644 index 0000000..1c00cc9 --- /dev/null +++ b/tests/data/results.sage.csv @@ -0,0 +1,2 @@ +filename scannr peptide stripped_peptide proteins num_proteins rank is_decoy expmass calcmass charge peptide_len missed_cleavages isotope_error precursor_ppm fragment_ppm hyperscore delta_next delta_best rt aligned_rt predicted_rt delta_rt_model matched_peaks longest_b longest_y longest_y_pct matched_intensity_pct scored_candidates poisson sage_discriminant_score posterior_error spectrum_q peptide_q protein_q +LQSRPAAPPAPGPGQLTLR.mzML controllerType=0 controllerNumber=1 scan=30069 LQSRPAAPPAPGPGQLTLR LQSRPAAPPAPGPGQLTLR sp|Q99536|VAT1_HUMAN 1 1 false 1926.0815 1926.08 3 19 0 0.0 0.8239083 0.503857 72.265915 72.265915 0.0 108.2854 0.993444 0.0 0.993444 22 9 12 0.6315789 64.770966 1 -1.9562812 1.2944585 1.0 1.0 1.0 1.0 diff --git a/tests/system_tests/test_sage2lib.py b/tests/system_tests/test_sage2lib.py new file mode 100644 index 0000000..f6ed9c1 --- /dev/null +++ b/tests/system_tests/test_sage2lib.py @@ -0,0 +1,29 @@ +"""Test building a dlib""" +import sqlite3 +import subprocess + +import pytest + + +@pytest.mark.parametrize("ftype", ["parquet", "csv"]) +def test_simpole_speclib_generation(data_path, tmp_path, ftype): + """Test spectral library generation""" + psms = data_path / f"results.sage.{ftype}" + mzml = data_path / "LQSRPAAPPAPGPGQLTLR.mzML" + outfile = tmp_path / "results.sage.dlib" + cmd = [ + "petasus", + "sage2lib", + "--qvalue", + "1", + f"{psms}", + f"{mzml}", + ] + + subprocess.run(cmd, check=True) + + con = sqlite3.Connection(str(outfile)) + cur = con.cursor() + res = cur.execute("select PeptideModSeq from entries;") + res = [x[0] for x in res.fetchall()] + assert len(res)