From 97b46fed9f7363bd4635d229606ce831982a15bf Mon Sep 17 00:00:00 2001 From: Auke Heerdink <21688542+aukeheerdink@users.noreply.github.com> Date: Tue, 11 Feb 2025 11:56:10 +0100 Subject: [PATCH 1/7] Added pka to aminoacid --- rustyms/src/aminoacid_properties.rs | 106 ++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/rustyms/src/aminoacid_properties.rs b/rustyms/src/aminoacid_properties.rs index 98d8ad74..5ab8eab5 100644 --- a/rustyms/src/aminoacid_properties.rs +++ b/rustyms/src/aminoacid_properties.rs @@ -10,6 +10,8 @@ use serde::{Deserialize, Serialize}; +use crate::AminoAcid; + /// All amino acid property classes according to IMGT. /// > IMGT standardized criteria for statistical analysis of immunoglobulin V-REGION amino acid properties /// > @@ -206,6 +208,23 @@ impl crate::AminoAcid { } } } + + pub fn pka(&self) -> pKa { + self.pka_with_source(pksources::Lide1991) + } + + pub fn pka_with_source(self, source: pksources) -> pKa { + let table = match source { + pksources::Lide1991 => PKA_LIDE1991, + pksources::Lehninger => PKA_LEHNINGER, + }; + + table + .iter() + .find(|&&(amino_acid, _)| amino_acid == self) + .map(|&(_, pka)| pka) + .expect("Peptide not found in the specified table") + } } #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)] @@ -277,3 +296,90 @@ pub enum HydrogenBondClass { None, Unknown, } + +#[derive(Copy, Clone, PartialEq, PartialOrd, Debug, Serialize, Deserialize)] +pub struct pKa { + carboxyl: f32, + ammonium: f32, + sidechain: Option, +} + +impl pKa { + const fn from(carboxyl: f32, ammonium: f32, sidechain: Option) -> Self { + Self { + carboxyl, + ammonium, + sidechain, + } + } + pub const fn carboxyl(self) -> f32 { + self.carboxyl + } + pub const fn ammonium(self) -> f32 { + self.ammonium + } + pub const fn sidechain(self) -> Option { + self.sidechain + } +} + +pub enum pksources { + Lide1991, + Lehninger, +} + +// pKa values from Lide, D. R. (1991). Handbook of Chemistry and Physics: A Ready Reference Book of Chemical and Physical Data. +const PKA_LIDE1991: &[(AminoAcid, pKa)] = &[ + (AminoAcid::Arginine, pKa::from(2.03, 9.00, Some(12.10))), + (AminoAcid::Histidine, pKa::from(1.70, 9.09, Some(6.04))), + (AminoAcid::Lysine, pKa::from(2.15, 9.16, Some(10.67))), + (AminoAcid::AsparticAcid, pKa::from(1.95, 9.66, Some(3.71))), + (AminoAcid::GlutamicAcid, pKa::from(2.16, 9.58, Some(4.15))), + (AminoAcid::Tyrosine, pKa::from(2.24, 9.04, Some(10.10))), + (AminoAcid::Cysteine, pKa::from(1.91, 10.28, Some(8.14))), + (AminoAcid::Alanine, pKa::from(2.33, 9.71, None)), + (AminoAcid::Glycine, pKa::from(2.34, 9.58, None)), + (AminoAcid::Proline, pKa::from(1.95, 10.47, None)), + (AminoAcid::Serine, pKa::from(2.13, 9.05, None)), + (AminoAcid::Threonine, pKa::from(2.20, 8.96, None)), + (AminoAcid::Methionine, pKa::from(2.16, 9.08, None)), + (AminoAcid::Phenylalanine, pKa::from(2.18, 9.09, None)), + (AminoAcid::Tryptophan, pKa::from(2.38, 9.34, None)), + (AminoAcid::Valine, pKa::from(2.27, 9.52, None)), + (AminoAcid::Isoleucine, pKa::from(2.26, 9.60, None)), + (AminoAcid::Leucine, pKa::from(2.32, 9.58, None)), + (AminoAcid::Glutamine, pKa::from(2.18, 9.00, None)), + (AminoAcid::Asparagine, pKa::from(2.16, 8.73, None)), + (AminoAcid::AmbiguousAsparagine, pKa::from(2.16, 8.73, None)), + (AminoAcid::AmbiguousGlutamine, pKa::from(2.18, 9.00, None)), + // (AminoAcid::Pyrrolysine, todo!()), + // (AminoAcid::Unknown, todo!()), +]; + +// pKa values from Lehninger, A. L., Nelson, D. L., & Cox, M. M. (2005). Lehninger Principles of Biochemistry. Macmillan. +const PKA_LEHNINGER: &[(AminoAcid, pKa)] = &[ + (AminoAcid::Arginine, pKa::from(2.17, 9.04, Some(12.48))), + (AminoAcid::Histidine, pKa::from(1.82, 9.17, Some(6.00))), + (AminoAcid::Lysine, pKa::from(2.18, 8.95, Some(10.53))), + (AminoAcid::AsparticAcid, pKa::from(1.88, 9.60, Some(3.65))), + (AminoAcid::GlutamicAcid, pKa::from(2.19, 9.67, Some(4.25))), + (AminoAcid::Tyrosine, pKa::from(2.20, 9.11, Some(10.07))), + (AminoAcid::Cysteine, pKa::from(1.96, 10.28, Some(8.18))), + (AminoAcid::Alanine, pKa::from(2.34, 9.69, None)), + (AminoAcid::Glycine, pKa::from(2.34, 9.60, None)), + (AminoAcid::Proline, pKa::from(1.99, 10.96, None)), + (AminoAcid::Serine, pKa::from(2.21, 9.15, None)), + (AminoAcid::Threonine, pKa::from(2.11, 9.62, None)), + (AminoAcid::Methionine, pKa::from(2.28, 9.21, None)), + (AminoAcid::Phenylalanine, pKa::from(1.83, 9.13, None)), + (AminoAcid::Tryptophan, pKa::from(2.38, 9.39, None)), + (AminoAcid::Valine, pKa::from(2.32, 9.62, None)), + (AminoAcid::Isoleucine, pKa::from(2.36, 9.68, None)), + (AminoAcid::Leucine, pKa::from(2.36, 9.60, None)), + (AminoAcid::Glutamine, pKa::from(2.17, 9.13, None)), + (AminoAcid::Asparagine, pKa::from(2.02, 8.80, None)), + (AminoAcid::AmbiguousAsparagine, pKa::from(2.02, 8.80, None)), + (AminoAcid::AmbiguousGlutamine, pKa::from(2.17, 9.13, None)), + // (AminoAcid::Pyrrolysine, todo!()), + // (AminoAcid::Unknown, todo!()), +]; From 0da256e9a7bc85abcc5c7f0f408aa5124cddfd3a Mon Sep 17 00:00:00 2001 From: Douwe Schulte Date: Tue, 11 Feb 2025 14:26:58 +0100 Subject: [PATCH 2/7] Restructure into trait definition --- rustyms/src/aminoacid_properties.rs | 175 +++++++++++++++------------- 1 file changed, 92 insertions(+), 83 deletions(-) diff --git a/rustyms/src/aminoacid_properties.rs b/rustyms/src/aminoacid_properties.rs index 5ab8eab5..c7f05bba 100644 --- a/rustyms/src/aminoacid_properties.rs +++ b/rustyms/src/aminoacid_properties.rs @@ -10,7 +10,10 @@ use serde::{Deserialize, Serialize}; -use crate::AminoAcid; +use crate::{ + aminoacids::IsAminoAcid, modification::SimpleModification, AminoAcid, Peptidoform, + SemiAmbiguous, +}; /// All amino acid property classes according to IMGT. /// > IMGT standardized criteria for statistical analysis of immunoglobulin V-REGION amino acid properties @@ -208,23 +211,6 @@ impl crate::AminoAcid { } } } - - pub fn pka(&self) -> pKa { - self.pka_with_source(pksources::Lide1991) - } - - pub fn pka_with_source(self, source: pksources) -> pKa { - let table = match source { - pksources::Lide1991 => PKA_LIDE1991, - pksources::Lehninger => PKA_LEHNINGER, - }; - - table - .iter() - .find(|&&(amino_acid, _)| amino_acid == self) - .map(|&(_, pka)| pka) - .expect("Peptide not found in the specified table") - } } #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)] @@ -298,88 +284,111 @@ pub enum HydrogenBondClass { } #[derive(Copy, Clone, PartialEq, PartialOrd, Debug, Serialize, Deserialize)] -pub struct pKa { - carboxyl: f32, +pub struct AminoAcidpKa { ammonium: f32, sidechain: Option, + carboxyl: f32, } -impl pKa { - const fn from(carboxyl: f32, ammonium: f32, sidechain: Option) -> Self { +/// A source for pKa values, which can be used to calculate the pKa for peptidoforms. +pub trait pKaSource { + /// Get the pKa values for the given amino acid and modifications. + fn pKa(amino_acid: AA, modifications: &[SimpleModification]) -> Option; + + /// Get the calculated pKa value for the given peptidoform, or None if any of the sequence elements do not have a defined pKa. + fn peptide_pKa(peptidoform: Peptidoform) -> Option { + todo!() + } +} + +/// The pKa values for an amino acid +impl AminoAcidpKa { + const fn new(ammonium: f32, sidechain: Option, carboxyl: f32) -> Self { Self { - carboxyl, ammonium, sidechain, + carboxyl, } } - pub const fn carboxyl(self) -> f32 { - self.carboxyl - } pub const fn ammonium(self) -> f32 { self.ammonium } pub const fn sidechain(self) -> Option { self.sidechain } + pub const fn carboxyl(self) -> f32 { + self.carboxyl + } } -pub enum pksources { - Lide1991, - Lehninger, +/// pKa values from Lide, D. R. (1991). Handbook of Chemistry and Physics: A Ready Reference Book of Chemical and Physical Data. +pub struct pKaLide1991; + +impl pKaSource for pKaLide1991 { + fn pKa(amino_acid: AminoAcid, modifications: &[SimpleModification]) -> Option { + if !modifications.is_empty() { + return None; + } + match amino_acid { + AminoAcid::Arginine => Some(AminoAcidpKa::new(9.00, Some(12.10), 2.03)), + AminoAcid::Histidine => Some(AminoAcidpKa::new(9.09, Some(6.04), 1.70)), + AminoAcid::Lysine => Some(AminoAcidpKa::new(9.16, Some(10.67), 2.15)), + AminoAcid::AsparticAcid => Some(AminoAcidpKa::new(9.66, Some(3.71), 1.95)), + AminoAcid::GlutamicAcid => Some(AminoAcidpKa::new(9.58, Some(4.15), 2.16)), + AminoAcid::Tyrosine => Some(AminoAcidpKa::new(9.04, Some(10.10), 2.24)), + AminoAcid::Cysteine => Some(AminoAcidpKa::new(10.28, Some(8.14), 1.91)), + AminoAcid::Alanine => Some(AminoAcidpKa::new(9.71, None, 2.33)), + AminoAcid::Glycine => Some(AminoAcidpKa::new(9.58, None, 2.34)), + AminoAcid::Proline => Some(AminoAcidpKa::new(10.47, None, 1.95)), + AminoAcid::Serine => Some(AminoAcidpKa::new(9.05, None, 2.13)), + AminoAcid::Threonine => Some(AminoAcidpKa::new(8.96, None, 2.20)), + AminoAcid::Methionine => Some(AminoAcidpKa::new(9.08, None, 2.16)), + AminoAcid::Phenylalanine => Some(AminoAcidpKa::new(9.09, None, 2.18)), + AminoAcid::Tryptophan => Some(AminoAcidpKa::new(9.34, None, 2.38)), + AminoAcid::Valine => Some(AminoAcidpKa::new(9.52, None, 2.27)), + AminoAcid::Isoleucine => Some(AminoAcidpKa::new(9.60, None, 2.26)), + AminoAcid::Leucine => Some(AminoAcidpKa::new(9.58, None, 2.32)), + AminoAcid::Glutamine => Some(AminoAcidpKa::new(9.00, None, 2.18)), + AminoAcid::Asparagine => Some(AminoAcidpKa::new(8.73, None, 2.16)), + AminoAcid::AmbiguousAsparagine => Some(AminoAcidpKa::new(8.73, None, 2.16)), + AminoAcid::AmbiguousGlutamine => Some(AminoAcidpKa::new(9.00, None, 2.18)), + _ => None, + } + } } -// pKa values from Lide, D. R. (1991). Handbook of Chemistry and Physics: A Ready Reference Book of Chemical and Physical Data. -const PKA_LIDE1991: &[(AminoAcid, pKa)] = &[ - (AminoAcid::Arginine, pKa::from(2.03, 9.00, Some(12.10))), - (AminoAcid::Histidine, pKa::from(1.70, 9.09, Some(6.04))), - (AminoAcid::Lysine, pKa::from(2.15, 9.16, Some(10.67))), - (AminoAcid::AsparticAcid, pKa::from(1.95, 9.66, Some(3.71))), - (AminoAcid::GlutamicAcid, pKa::from(2.16, 9.58, Some(4.15))), - (AminoAcid::Tyrosine, pKa::from(2.24, 9.04, Some(10.10))), - (AminoAcid::Cysteine, pKa::from(1.91, 10.28, Some(8.14))), - (AminoAcid::Alanine, pKa::from(2.33, 9.71, None)), - (AminoAcid::Glycine, pKa::from(2.34, 9.58, None)), - (AminoAcid::Proline, pKa::from(1.95, 10.47, None)), - (AminoAcid::Serine, pKa::from(2.13, 9.05, None)), - (AminoAcid::Threonine, pKa::from(2.20, 8.96, None)), - (AminoAcid::Methionine, pKa::from(2.16, 9.08, None)), - (AminoAcid::Phenylalanine, pKa::from(2.18, 9.09, None)), - (AminoAcid::Tryptophan, pKa::from(2.38, 9.34, None)), - (AminoAcid::Valine, pKa::from(2.27, 9.52, None)), - (AminoAcid::Isoleucine, pKa::from(2.26, 9.60, None)), - (AminoAcid::Leucine, pKa::from(2.32, 9.58, None)), - (AminoAcid::Glutamine, pKa::from(2.18, 9.00, None)), - (AminoAcid::Asparagine, pKa::from(2.16, 8.73, None)), - (AminoAcid::AmbiguousAsparagine, pKa::from(2.16, 8.73, None)), - (AminoAcid::AmbiguousGlutamine, pKa::from(2.18, 9.00, None)), - // (AminoAcid::Pyrrolysine, todo!()), - // (AminoAcid::Unknown, todo!()), -]; +/// pKa values from Lehninger, A. L., Nelson, D. L., & Cox, M. M. (2005). Lehninger Principles of Biochemistry. Macmillan. +pub struct pKaLehninger; -// pKa values from Lehninger, A. L., Nelson, D. L., & Cox, M. M. (2005). Lehninger Principles of Biochemistry. Macmillan. -const PKA_LEHNINGER: &[(AminoAcid, pKa)] = &[ - (AminoAcid::Arginine, pKa::from(2.17, 9.04, Some(12.48))), - (AminoAcid::Histidine, pKa::from(1.82, 9.17, Some(6.00))), - (AminoAcid::Lysine, pKa::from(2.18, 8.95, Some(10.53))), - (AminoAcid::AsparticAcid, pKa::from(1.88, 9.60, Some(3.65))), - (AminoAcid::GlutamicAcid, pKa::from(2.19, 9.67, Some(4.25))), - (AminoAcid::Tyrosine, pKa::from(2.20, 9.11, Some(10.07))), - (AminoAcid::Cysteine, pKa::from(1.96, 10.28, Some(8.18))), - (AminoAcid::Alanine, pKa::from(2.34, 9.69, None)), - (AminoAcid::Glycine, pKa::from(2.34, 9.60, None)), - (AminoAcid::Proline, pKa::from(1.99, 10.96, None)), - (AminoAcid::Serine, pKa::from(2.21, 9.15, None)), - (AminoAcid::Threonine, pKa::from(2.11, 9.62, None)), - (AminoAcid::Methionine, pKa::from(2.28, 9.21, None)), - (AminoAcid::Phenylalanine, pKa::from(1.83, 9.13, None)), - (AminoAcid::Tryptophan, pKa::from(2.38, 9.39, None)), - (AminoAcid::Valine, pKa::from(2.32, 9.62, None)), - (AminoAcid::Isoleucine, pKa::from(2.36, 9.68, None)), - (AminoAcid::Leucine, pKa::from(2.36, 9.60, None)), - (AminoAcid::Glutamine, pKa::from(2.17, 9.13, None)), - (AminoAcid::Asparagine, pKa::from(2.02, 8.80, None)), - (AminoAcid::AmbiguousAsparagine, pKa::from(2.02, 8.80, None)), - (AminoAcid::AmbiguousGlutamine, pKa::from(2.17, 9.13, None)), - // (AminoAcid::Pyrrolysine, todo!()), - // (AminoAcid::Unknown, todo!()), -]; +impl pKaSource for pKaLehninger { + fn pKa(amino_acid: AminoAcid, modifications: &[SimpleModification]) -> Option { + if !modifications.is_empty() { + return None; + } + match amino_acid { + AminoAcid::Arginine => Some(AminoAcidpKa::new(9.04, Some(12.48), 2.17)), + AminoAcid::Histidine => Some(AminoAcidpKa::new(9.17, Some(6.00), 1.82)), + AminoAcid::Lysine => Some(AminoAcidpKa::new(8.95, Some(10.53), 2.18)), + AminoAcid::AsparticAcid => Some(AminoAcidpKa::new(9.60, Some(3.65), 1.88)), + AminoAcid::GlutamicAcid => Some(AminoAcidpKa::new(9.67, Some(4.25), 2.19)), + AminoAcid::Tyrosine => Some(AminoAcidpKa::new(9.11, Some(10.07), 2.20)), + AminoAcid::Cysteine => Some(AminoAcidpKa::new(10.28, Some(8.18), 1.96)), + AminoAcid::Alanine => Some(AminoAcidpKa::new(9.69, None, 2.34)), + AminoAcid::Glycine => Some(AminoAcidpKa::new(9.60, None, 2.34)), + AminoAcid::Proline => Some(AminoAcidpKa::new(10.96, None, 1.99)), + AminoAcid::Serine => Some(AminoAcidpKa::new(9.15, None, 2.21)), + AminoAcid::Threonine => Some(AminoAcidpKa::new(9.62, None, 2.11)), + AminoAcid::Methionine => Some(AminoAcidpKa::new(9.21, None, 2.28)), + AminoAcid::Phenylalanine => Some(AminoAcidpKa::new(9.13, None, 1.83)), + AminoAcid::Tryptophan => Some(AminoAcidpKa::new(9.39, None, 2.38)), + AminoAcid::Valine => Some(AminoAcidpKa::new(9.62, None, 2.32)), + AminoAcid::Isoleucine => Some(AminoAcidpKa::new(9.68, None, 2.36)), + AminoAcid::Leucine => Some(AminoAcidpKa::new(9.60, None, 2.36)), + AminoAcid::Glutamine => Some(AminoAcidpKa::new(9.13, None, 2.17)), + AminoAcid::Asparagine => Some(AminoAcidpKa::new(8.80, None, 2.02)), + AminoAcid::AmbiguousAsparagine => Some(AminoAcidpKa::new(8.80, None, 2.02)), + AminoAcid::AmbiguousGlutamine => Some(AminoAcidpKa::new(9.13, None, 2.17)), + _ => None, + } + } +} From 2168f1971454ac28cff8d372f82e66d401d4ac7c Mon Sep 17 00:00:00 2001 From: Douwe Schulte Date: Mon, 10 Feb 2025 12:13:36 +0100 Subject: [PATCH 3/7] Used trait IsAminoAcid --- rustyms-generate-imgt/src/structs.rs | 14 +- rustyms-py/src/lib.rs | 6 +- rustyms/src/align/multi_alignment.rs | 2 +- rustyms/src/aminoacids.rs | 491 ++++++++++++++++----------- rustyms/src/checked_aminoacid.rs | 51 ++- rustyms/src/fragment.rs | 8 +- rustyms/src/lib.rs | 2 +- rustyms/src/sequence_element.rs | 2 +- 8 files changed, 353 insertions(+), 223 deletions(-) diff --git a/rustyms-generate-imgt/src/structs.rs b/rustyms-generate-imgt/src/structs.rs index 3f1bb907..f6901152 100644 --- a/rustyms-generate-imgt/src/structs.rs +++ b/rustyms-generate-imgt/src/structs.rs @@ -4,7 +4,7 @@ use std::str::FromStr; use crate::imgt_gene::IMGTGene; use crate::shared::{AnnotatedSequence, Gene, Species}; -use rustyms::AminoAcid; +use rustyms::{AminoAcid, IsAminoAcid}; #[derive(Debug)] pub struct DataItem { @@ -57,7 +57,12 @@ impl Display for Region { // self.found_seq.0, self.found_seq .as_ref() - .map(|seq| seq.1 .0.iter().map(|a| a.char()).collect::()) + .map(|seq| seq + .1 + .0 + .iter() + .map(|a| a.pro_forma_definition()) + .collect::()) .unwrap_or_else(|e| format!(": {e}")), ) } @@ -237,7 +242,10 @@ impl std::fmt::Debug for AASequence { write!( f, "[{}]", - self.0.iter().map(|a| a.char()).collect::() + self.0 + .iter() + .map(|a| a.pro_forma_definition()) + .collect::() ) } } diff --git a/rustyms-py/src/lib.rs b/rustyms-py/src/lib.rs index 13c23e5f..3d0822c2 100644 --- a/rustyms-py/src/lib.rs +++ b/rustyms-py/src/lib.rs @@ -6,7 +6,7 @@ use std::num::NonZeroU16; use ordered_float::OrderedFloat; use pyo3::{exceptions::PyValueError, prelude::*, types::PyType}; -use rustyms::{AnnotatableSpectrum, Chemical, Linked, MultiChemical}; +use rustyms::{AnnotatableSpectrum, Chemical, IsAminoAcid, Linked, MultiChemical}; /// Mass mode enum. #[pyclass(eq, eq_int)] @@ -440,7 +440,7 @@ impl AminoAcid { } fn __str__(&self) -> String { - self.0.char().to_string() + self.0.pro_forma_definition().to_string() } fn __repr__(&self) -> String { @@ -1124,7 +1124,7 @@ impl Peptidoform { self.0 .sequence() .iter() - .map(|x| x.aminoacid.char()) + .map(|x| x.aminoacid.pro_forma_definition()) .collect() } diff --git a/rustyms/src/align/multi_alignment.rs b/rustyms/src/align/multi_alignment.rs index 24e72a8f..6359de3d 100644 --- a/rustyms/src/align/multi_alignment.rs +++ b/rustyms/src/align/multi_alignment.rs @@ -36,7 +36,7 @@ impl MultiAlignmentLine<'_, Complexity> { { print!( "{}{}", - piece.1.aminoacid.char(), + piece.1.aminoacid, "·".repeat(piece.0.step as usize - 1) ); } diff --git a/rustyms/src/aminoacids.rs b/rustyms/src/aminoacids.rs index 687ec960..c81ad472 100644 --- a/rustyms/src/aminoacids.rs +++ b/rustyms/src/aminoacids.rs @@ -12,7 +12,7 @@ use crate::{ use std::borrow::Cow; /// A general trait to define amino acids. -pub trait IsAminoAcid { +pub trait IsAminoAcid: MultiChemical { /// The full name for this amino acid. fn name(&self) -> Cow<'_, str>; /// The three letter code for this amino acid. Or None if there is no common three letter @@ -26,9 +26,6 @@ pub trait IsAminoAcid { /// defined as an amino acid with an additional modification. For example `X[H9C2N2]` could be /// used if Arginine was not defined as `R` in ProForma. fn pro_forma_definition(&self) -> Cow<'_, str>; - /// The full molecular formula for this amino acid. It allows multiple molecular formulas to - /// allow ambiguous amino acids such as B and Z. - fn formulas(&self) -> Cow<'_, Multi>; /// The monoisotopic mass of this amino acid. Should be redefined for better performance. fn monoisotopic_mass(&self) -> Cow<'_, Multi> { Cow::Owned( @@ -51,79 +48,224 @@ pub trait IsAminoAcid { fn mass(&self, mode: MassMode) -> Cow<'_, Multi> { Cow::Owned(self.formulas().iter().map(|f| f.mass(mode)).collect()) } - /// The molecular formula of the side chain of the amino acid. - fn side_chain(&self) -> Cow<'_, Multi>; + /// The molecular formula of the side chain of the amino acid. The `sequence_index` and + /// `peptidoform_index` are used to keep track of ambiguous amino acids. + fn side_chain( + &self, + sequence_index: SequencePosition, + peptidoform_index: usize, + ) -> Cow<'_, Multi>; /// The molecular formulas that can fragment for satellite ions (d and w). Commonly the fragment /// after the second carbon into the side chain. `MolecularFormula::default()` can be returned - /// if no satellite ions are possible. - fn satellite_ion_fragments(&self) -> Option>>; + /// if no satellite ions are possible. The `sequence_index` and `peptidoform_index` are used to + /// keep track of ambiguous amino acids. + fn satellite_ion_fragments( + &self, + sequence_index: SequencePosition, + peptidoform_index: usize, + ) -> Option>>; /// Common neutral losses for the immonium ion of this amino acid. fn immonium_losses(&self) -> Cow<'_, [NeutralLoss]>; } +impl std::fmt::Display for dyn IsAminoAcid { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.pro_forma_definition()) + } +} + include!("shared/aminoacid.rs"); -impl AminoAcid { - /// All amino acids with a unique mass (no I/L in favour of J, no B, no Z, and no X) - pub const UNIQUE_MASS_AMINO_ACIDS: &'static [Self] = &[ - Self::Glycine, - Self::Alanine, - Self::Arginine, - Self::Asparagine, - Self::AsparticAcid, - Self::Cysteine, - Self::Glutamine, - Self::GlutamicAcid, - Self::Histidine, - Self::AmbiguousLeucine, - Self::Lysine, - Self::Methionine, - Self::Phenylalanine, - Self::Proline, - Self::Serine, - Self::Threonine, - Self::Tryptophan, - Self::Tyrosine, - Self::Valine, - Self::Selenocysteine, - Self::Pyrrolysine, - ]; +impl std::fmt::Display for AminoAcid { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.pro_forma_definition()) + } +} - /// All 20 canonical amino acids - pub const CANONICAL_AMINO_ACIDS: &'static [Self] = &[ - Self::Glycine, - Self::Alanine, - Self::Arginine, - Self::Asparagine, - Self::AsparticAcid, - Self::Cysteine, - Self::Glutamine, - Self::GlutamicAcid, - Self::Histidine, - Self::Leucine, - Self::Isoleucine, - Self::Lysine, - Self::Methionine, - Self::Phenylalanine, - Self::Proline, - Self::Serine, - Self::Threonine, - Self::Tryptophan, - Self::Tyrosine, - Self::Valine, - ]; +impl IsAminoAcid for AminoAcid { + /// Get the single letter representation of the amino acid + fn one_letter_code(&self) -> Option { + Some(match self { + Self::Alanine => 'A', + Self::AmbiguousAsparagine => 'B', + Self::Cysteine => 'C', + Self::AsparticAcid => 'D', + Self::GlutamicAcid => 'E', + Self::Phenylalanine => 'F', + Self::Glycine => 'G', + Self::Histidine => 'H', + Self::Isoleucine => 'I', + Self::AmbiguousLeucine => 'J', + Self::Lysine => 'K', + Self::Leucine => 'L', + Self::Methionine => 'M', + Self::Asparagine => 'N', + Self::Pyrrolysine => 'O', + Self::Proline => 'P', + Self::Glutamine => 'Q', + Self::Arginine => 'R', + Self::Serine => 'S', + Self::Threonine => 'T', + Self::Selenocysteine => 'U', + Self::Valine => 'V', + Self::Tryptophan => 'W', + Self::Unknown => 'X', + Self::Tyrosine => 'Y', + Self::AmbiguousGlutamine => 'Z', + }) + } + + fn pro_forma_definition(&self) -> Cow<'_, str> { + Cow::Borrowed(match self { + Self::Alanine => "A", + Self::AmbiguousAsparagine => "B", + Self::Cysteine => "C", + Self::AsparticAcid => "D", + Self::GlutamicAcid => "E", + Self::Phenylalanine => "F", + Self::Glycine => "G", + Self::Histidine => "H", + Self::Isoleucine => "I", + Self::AmbiguousLeucine => "J", + Self::Lysine => "K", + Self::Leucine => "L", + Self::Methionine => "M", + Self::Asparagine => "N", + Self::Pyrrolysine => "O", + Self::Proline => "P", + Self::Glutamine => "Q", + Self::Arginine => "R", + Self::Serine => "S", + Self::Threonine => "T", + Self::Selenocysteine => "U", + Self::Valine => "V", + Self::Tryptophan => "W", + Self::Unknown => "X", + Self::Tyrosine => "Y", + Self::AmbiguousGlutamine => "Z", + }) + } + + /// Get the 3 letter code for the amino acid + fn three_letter_code(&self) -> Option> { + Some(Cow::Borrowed(match self { + Self::Alanine => "Ala", + Self::AmbiguousAsparagine => "Asx", + Self::Cysteine => "Cys", + Self::AsparticAcid => "Asp", + Self::GlutamicAcid => "Glu", + Self::Phenylalanine => "Phe", + Self::Glycine => "Gly", + Self::Histidine => "His", + Self::Isoleucine => "Ile", + Self::AmbiguousLeucine => "Xle", + Self::Lysine => "Lys", + Self::Leucine => "Leu", + Self::Methionine => "Met", + Self::Asparagine => "Asn", + Self::Pyrrolysine => "Pyl", + Self::Proline => "Pro", + Self::Glutamine => "Gln", + Self::Arginine => "Arg", + Self::Serine => "Ser", + Self::Threonine => "Thr", + Self::Selenocysteine => "Sec", + Self::Valine => "Val", + Self::Tryptophan => "Trp", + Self::Unknown => "Xaa", + Self::Tyrosine => "Tyr", + Self::AmbiguousGlutamine => "Glx", + })) + } + + /// Get the full name for the amino acid + fn name(&self) -> Cow<'_, str> { + Cow::Borrowed(match self { + Self::Alanine => "Alanine", + Self::AmbiguousAsparagine => "AmbiguousAsparagine", + Self::Cysteine => "Cysteine", + Self::AsparticAcid => "AsparticAcid", + Self::GlutamicAcid => "GlutamicAcid", + Self::Phenylalanine => "Phenylalanine", + Self::Glycine => "Glycine", + Self::Histidine => "Histidine", + Self::Isoleucine => "Isoleucine", + Self::AmbiguousLeucine => "AmbiguousLeucine", + Self::Lysine => "Lysine", + Self::Leucine => "Leucine", + Self::Methionine => "Methionine", + Self::Asparagine => "Asparagine", + Self::Pyrrolysine => "Pyrrolysine", + Self::Proline => "Proline", + Self::Glutamine => "Glutamine", + Self::Arginine => "Arginine", + Self::Serine => "Serine", + Self::Threonine => "Threonine", + Self::Selenocysteine => "Selenocysteine", + Self::Valine => "Valine", + Self::Tryptophan => "Tryptophan", + Self::Unknown => "Unknown", + Self::Tyrosine => "Tyrosine", + Self::AmbiguousGlutamine => "AmbiguousGlutamine", + }) + } + + fn side_chain( + &self, + sequence_index: SequencePosition, + peptidoform_index: usize, + ) -> Cow<'_, Multi> { + let crate::SequencePosition::Index(sequence_index) = sequence_index else { + return Cow::Owned(Multi::default()); + }; + Cow::Owned(match self { + Self::Alanine => molecular_formula!(H 3 C 1).into(), + Self::Arginine => molecular_formula!(H 10 C 4 N 3).into(), // One of the H's counts as the charge carrier and is added later + Self::Asparagine => molecular_formula!(H 4 C 2 O 1 N 1).into(), + Self::AsparticAcid => molecular_formula!(H 3 C 2 O 2).into(), + Self::AmbiguousAsparagine => vec![ + molecular_formula!(H 4 C 2 O 1 N 1 (crate::AmbiguousLabel::AminoAcid{option: Self::Asparagine, sequence_index, peptidoform_index})), + molecular_formula!(H 3 C 2 O 2 (crate::AmbiguousLabel::AminoAcid{option: Self::AsparticAcid, sequence_index, peptidoform_index})), + ] + .into(), + Self::Cysteine => molecular_formula!(H 3 C 1 S 1).into(), + Self::Glutamine => molecular_formula!(H 6 C 3 O 1 N 1).into(), + Self::GlutamicAcid => molecular_formula!(H 5 C 3 O 2).into(), + Self::AmbiguousGlutamine => vec![ + molecular_formula!(H 6 C 3 O 1 N 1 (crate::AmbiguousLabel::AminoAcid{option: Self::Glutamine, sequence_index, peptidoform_index})), + molecular_formula!(H 5 C 3 O 2 (crate::AmbiguousLabel::AminoAcid{option: Self::GlutamicAcid, sequence_index, peptidoform_index})), + ] + .into(), + Self::Glycine => molecular_formula!(H 1).into(), + Self::Histidine => molecular_formula!(H 5 C 4 N 2).into(), + Self::AmbiguousLeucine | Self::Isoleucine | Self::Leucine => { + molecular_formula!(H 9 C 4).into() + } + Self::Lysine => molecular_formula!(H 10 C 4 N 1).into(), + Self::Methionine => molecular_formula!(H 7 C 3 S 1).into(), + Self::Phenylalanine => molecular_formula!(H 7 C 7).into(), + Self::Proline => molecular_formula!(H 5 C 3).into(), + Self::Pyrrolysine => molecular_formula!(H 17 C 9 O 1 N 2).into(), + Self::Selenocysteine => molecular_formula!(H 3 C 1 Se 1).into(), + Self::Serine => molecular_formula!(H 3 C 1 O 1).into(), + Self::Threonine => molecular_formula!(H 5 C 2 O 1).into(), + Self::Tryptophan => molecular_formula!(H 8 C 9 N 1).into(), + Self::Tyrosine => molecular_formula!(H 7 C 7 O 1).into(), + Self::Valine => molecular_formula!(H 7 C 3).into(), + Self::Unknown => molecular_formula!().into(), + }) + } // TODO: Take side chain mutations into account (maybe define pyrrolysine as a mutation) - /// # Panics - /// When the sequence index is terminal. - pub(crate) fn satellite_ion_fragments( - self, + fn satellite_ion_fragments( + &self, sequence_index: SequencePosition, peptidoform_index: usize, - ) -> Multi { + ) -> Option>> { let crate::SequencePosition::Index(sequence_index) = sequence_index else { - panic!("Not allowed to call satellite ion fragments with a terminal sequence index") + return None; }; + Some(Cow::Owned( match self { Self::Alanine | Self::Glycine @@ -172,7 +314,7 @@ impl AminoAcid { ] .into(), Self::Valine => molecular_formula!(H 3 C 1).into(), // Technically two options, but both have the same mass - } + })) } /// All losses from the base immonium ions. Compiled from the sources below. @@ -251,9 +393,9 @@ impl AminoAcid { /// | | 55 | | | 55 | | 55 | | | | | 55.0548 | | | | | | 4 | 55.0548 | | 17.0263 | | H3N1 | | H3N1 | /// | | 44 | | | | | | | | | | | | | | | | 1 | 44 | | 28.0811 | | C1H2N1 | | C1H2N1 | /// | | | | | 41 | | 41 | | | | | 41.0391 | | | | | | 3 | 41.0391 | | 31.0420 | | C1H5N1 | | C1H5N1 | - fn immonium_losses(self) -> Vec { + fn immonium_losses(&self) -> Cow<'_, [NeutralLoss]> { // TODO: For B/Z there are common immonium ions, but the mass is the same (meaning the loss is different), find a way of representing that - match self { + Cow::Owned(match self { Self::Arginine => vec![ NeutralLoss::Gain(molecular_formula!(C 2 O 2)), NeutralLoss::Loss(molecular_formula!(C 1 H 2)), @@ -321,8 +463,59 @@ impl AminoAcid { NeutralLoss::Loss(molecular_formula!(C 1 H 5 N 1)), ], _ => Vec::new(), - } + }) } +} + +impl AminoAcid { + /// All amino acids with a unique mass (no I/L in favour of J, no B, no Z, and no X) + pub const UNIQUE_MASS_AMINO_ACIDS: &'static [Self] = &[ + Self::Glycine, + Self::Alanine, + Self::Arginine, + Self::Asparagine, + Self::AsparticAcid, + Self::Cysteine, + Self::Glutamine, + Self::GlutamicAcid, + Self::Histidine, + Self::AmbiguousLeucine, + Self::Lysine, + Self::Methionine, + Self::Phenylalanine, + Self::Proline, + Self::Serine, + Self::Threonine, + Self::Tryptophan, + Self::Tyrosine, + Self::Valine, + Self::Selenocysteine, + Self::Pyrrolysine, + ]; + + /// All 20 canonical amino acids + pub const CANONICAL_AMINO_ACIDS: &'static [Self] = &[ + Self::Glycine, + Self::Alanine, + Self::Arginine, + Self::Asparagine, + Self::AsparticAcid, + Self::Cysteine, + Self::Glutamine, + Self::GlutamicAcid, + Self::Histidine, + Self::Leucine, + Self::Isoleucine, + Self::Lysine, + Self::Methionine, + Self::Phenylalanine, + Self::Proline, + Self::Serine, + Self::Threonine, + Self::Tryptophan, + Self::Tyrosine, + Self::Valine, + ]; // TODO: generalise over used storage type, so using molecularformula, monoisotopic mass, or average mass, also make sure that AAs can return these numbers in a const fashion #[expect(clippy::too_many_lines, clippy::too_many_arguments)] @@ -383,19 +576,23 @@ impl AminoAcid { )); } if ions.d.0 && allow_terminal.0 { - base_fragments.extend(Fragment::generate_all( - &(-self.satellite_ion_fragments(sequence_index, peptidoform_index) - * modifications - * self.formulas_inner(sequence_index, peptidoform_index) - + molecular_formula!(H 1 C 1 O 1)), - peptidoform_ion_index, - peptidoform_index, - &FragmentType::d(n_pos), - n_term, - ions.d.1, - charge_carriers, - ions.d.2, - )); + if let Some(satellite_ion_fragments) = + self.satellite_ion_fragments(sequence_index, peptidoform_index) + { + base_fragments.extend(Fragment::generate_all( + &(-satellite_ion_fragments.as_ref() + * modifications + * self.formulas_inner(sequence_index, peptidoform_index) + + molecular_formula!(H 1 C 1 O 1)), + peptidoform_ion_index, + peptidoform_index, + &FragmentType::d(n_pos), + n_term, + ions.d.1, + charge_carriers, + ions.d.2, + )); + } } if ions.v.0 && allow_terminal.1 { base_fragments.extend(Fragment::generate_all( @@ -410,19 +607,23 @@ impl AminoAcid { )); } if ions.w.0 && allow_terminal.1 { - base_fragments.extend(Fragment::generate_all( - &(-self.satellite_ion_fragments(sequence_index, peptidoform_index) - * modifications - * self.formulas_inner(sequence_index, peptidoform_index) - + molecular_formula!(H 2 N 1)), - peptidoform_ion_index, - peptidoform_index, - &FragmentType::w(c_pos), - c_term, - ions.w.1, - charge_carriers, - ions.w.2, - )); + if let Some(satellite_ion_fragments) = + self.satellite_ion_fragments(sequence_index, peptidoform_index) + { + base_fragments.extend(Fragment::generate_all( + &(-satellite_ion_fragments.as_ref() + * modifications + * self.formulas_inner(sequence_index, peptidoform_index) + + molecular_formula!(H 2 N 1)), + peptidoform_ion_index, + peptidoform_index, + &FragmentType::w(c_pos), + c_term, + ions.w.1, + charge_carriers, + ions.w.2, + )); + } } if ions.x.0 && allow_terminal.1 { base_fragments.extend(Fragment::generate_all( @@ -483,7 +684,7 @@ impl AminoAcid { peptidoform_index, &FragmentType::Immonium(n_pos, self.into()), // TODO: get the actual sequenceelement here &Multi::default(), - self.immonium_losses().as_slice(), + self.immonium_losses().as_ref(), charge_carriers, ions.immonium.1, )); @@ -491,102 +692,6 @@ impl AminoAcid { base_fragments } - /// Get the single letter representation of the amino acid - pub const fn char(self) -> char { - match self { - Self::Alanine => 'A', - Self::AmbiguousAsparagine => 'B', - Self::Cysteine => 'C', - Self::AsparticAcid => 'D', - Self::GlutamicAcid => 'E', - Self::Phenylalanine => 'F', - Self::Glycine => 'G', - Self::Histidine => 'H', - Self::Isoleucine => 'I', - Self::AmbiguousLeucine => 'J', - Self::Lysine => 'K', - Self::Leucine => 'L', - Self::Methionine => 'M', - Self::Asparagine => 'N', - Self::Pyrrolysine => 'O', - Self::Proline => 'P', - Self::Glutamine => 'Q', - Self::Arginine => 'R', - Self::Serine => 'S', - Self::Threonine => 'T', - Self::Selenocysteine => 'U', - Self::Valine => 'V', - Self::Tryptophan => 'W', - Self::Unknown => 'X', - Self::Tyrosine => 'Y', - Self::AmbiguousGlutamine => 'Z', - } - } - - /// Get the 3 letter code for the amino acid - pub const fn code(self) -> &'static str { - match self { - Self::Alanine => "Ala", - Self::AmbiguousAsparagine => "Asx", - Self::Cysteine => "Cys", - Self::AsparticAcid => "Asp", - Self::GlutamicAcid => "Glu", - Self::Phenylalanine => "Phe", - Self::Glycine => "Gly", - Self::Histidine => "His", - Self::Isoleucine => "Ile", - Self::AmbiguousLeucine => "Xle", - Self::Lysine => "Lys", - Self::Leucine => "Leu", - Self::Methionine => "Met", - Self::Asparagine => "Asn", - Self::Pyrrolysine => "Pyl", - Self::Proline => "Pro", - Self::Glutamine => "Gln", - Self::Arginine => "Arg", - Self::Serine => "Ser", - Self::Threonine => "Thr", - Self::Selenocysteine => "Sec", - Self::Valine => "Val", - Self::Tryptophan => "Trp", - Self::Unknown => "Xaa", - Self::Tyrosine => "Tyr", - Self::AmbiguousGlutamine => "Glx", - } - } - - /// Get the full name for the amino acid - pub const fn name(self) -> &'static str { - match self { - Self::Alanine => "Alanine", - Self::AmbiguousAsparagine => "AmbiguousAsparagine", - Self::Cysteine => "Cysteine", - Self::AsparticAcid => "AsparticAcid", - Self::GlutamicAcid => "GlutamicAcid", - Self::Phenylalanine => "Phenylalanine", - Self::Glycine => "Glycine", - Self::Histidine => "Histidine", - Self::Isoleucine => "Isoleucine", - Self::AmbiguousLeucine => "AmbiguousLeucine", - Self::Lysine => "Lysine", - Self::Leucine => "Leucine", - Self::Methionine => "Methionine", - Self::Asparagine => "Asparagine", - Self::Pyrrolysine => "Pyrrolysine", - Self::Proline => "Proline", - Self::Glutamine => "Glutamine", - Self::Arginine => "Arginine", - Self::Serine => "Serine", - Self::Threonine => "Threonine", - Self::Selenocysteine => "Selenocysteine", - Self::Valine => "Valine", - Self::Tryptophan => "Tryptophan", - Self::Unknown => "Unknown", - Self::Tyrosine => "Tyrosine", - Self::AmbiguousGlutamine => "AmbiguousGlutamine", - } - } - /// Check if two amino acids are considered identical. X is identical to anything, J to IL, B to ND, Z to EQ. pub(crate) fn canonical_identical(self, rhs: Self) -> bool { match (self, rhs) { @@ -604,12 +709,6 @@ impl AminoAcid { } } -impl std::fmt::Display for AminoAcid { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.char()) - } -} - #[cfg(test)] #[expect(clippy::unreadable_literal, clippy::missing_panics_doc)] mod tests { @@ -666,7 +765,7 @@ mod tests { ); println!( "{}: {} {} {} {}", - aa.char(), + aa.pro_forma_definition(), mono, mono_mass, weight, diff --git a/rustyms/src/checked_aminoacid.rs b/rustyms/src/checked_aminoacid.rs index f709c828..5d8036d1 100644 --- a/rustyms/src/checked_aminoacid.rs +++ b/rustyms/src/checked_aminoacid.rs @@ -3,7 +3,8 @@ use std::marker::PhantomData; use serde::{Deserialize, Serialize}; use crate::{ - AminoAcid, Chemical, MolecularFormula, Multi, MultiChemical, SemiAmbiguous, UnAmbiguous, + aminoacids::IsAminoAcid, AminoAcid, Chemical, MolecularFormula, Multi, MultiChemical, + SemiAmbiguous, UnAmbiguous, }; /// A checked amino acid. This wraps an [`AminoAcid`] to keep track of the maximal complexity of @@ -279,24 +280,48 @@ impl CheckedAminoAcid { self.aminoacid.canonical_identical(rhs.aminoacid) } - /// Get the description of the amino acid as a single character - pub const fn char(self) -> char { - self.aminoacid.char() + /// Get the underlying (unchecked) amino acid + pub const fn aminoacid(self) -> AminoAcid { + self.aminoacid } +} - /// Get the 3 letter code for the amino acid - pub const fn code(self) -> &'static str { - self.aminoacid.code() +impl IsAminoAcid for CheckedAminoAcid { + fn name(&self) -> std::borrow::Cow<'_, str> { + self.aminoacid.name() } - /// Get the full name of the amino acid - pub const fn name(self) -> &'static str { - self.aminoacid.name() + fn three_letter_code(&self) -> Option> { + self.aminoacid.three_letter_code() } - /// Get the underlying (unchecked) amino acid - pub const fn aminoacid(self) -> AminoAcid { + fn one_letter_code(&self) -> Option { + self.aminoacid.one_letter_code() + } + + fn pro_forma_definition(&self) -> std::borrow::Cow<'_, str> { + self.aminoacid.pro_forma_definition() + } + + fn immonium_losses(&self) -> std::borrow::Cow<'_, [crate::NeutralLoss]> { + self.aminoacid.immonium_losses() + } + + fn satellite_ion_fragments( + &self, + sequence_index: crate::SequencePosition, + peptidoform_index: usize, + ) -> Option>> { self.aminoacid + .satellite_ion_fragments(sequence_index, peptidoform_index) + } + + fn side_chain( + &self, + sequence_index: crate::SequencePosition, + peptidoform_index: usize, + ) -> std::borrow::Cow<'_, Multi> { + self.aminoacid.side_chain(sequence_index, peptidoform_index) } } @@ -398,7 +423,7 @@ impl Default for CheckedAminoAcid { impl std::fmt::Display for CheckedAminoAcid { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.char()) + write!(f, "{}", self.pro_forma_definition()) } } diff --git a/rustyms/src/fragment.rs b/rustyms/src/fragment.rs index b93b362e..6e63bf65 100644 --- a/rustyms/src/fragment.rs +++ b/rustyms/src/fragment.rs @@ -481,9 +481,7 @@ impl FragmentType { Self::z·(_) => Cow::Borrowed("z·"), Self::B(_) => Cow::Borrowed("B"), Self::Y(_) | Self::YComposition(_, _) => Cow::Borrowed("Y"), - Self::Diagnostic(DiagnosticPosition::Peptide(_, aa)) => { - Cow::Owned(format!("d{}", aa.char())) - } + Self::Diagnostic(DiagnosticPosition::Peptide(_, aa)) => Cow::Owned(format!("d{aa}")), Self::Diagnostic(DiagnosticPosition::Reporter) => Cow::Borrowed("r"), Self::Diagnostic(DiagnosticPosition::Labile(m)) => Cow::Owned(format!("d{m}")), Self::Diagnostic( @@ -491,8 +489,8 @@ impl FragmentType { | DiagnosticPosition::GlycanCompositional(sug, _), ) => Cow::Owned(format!("d{sug}")), Self::Oxonium(_) | Self::OxoniumComposition(_, _) => Cow::Borrowed("oxonium"), - Self::Immonium(_, aa) => Cow::Owned(format!("i{}", aa.aminoacid.char())), - Self::PrecursorSideChainLoss(_, aa) => Cow::Owned(format!("p-s{}", aa.char())), + Self::Immonium(_, aa) => Cow::Owned(format!("i{}", aa.aminoacid)), + Self::PrecursorSideChainLoss(_, aa) => Cow::Owned(format!("p-s{aa}")), Self::Precursor => Cow::Borrowed("p"), Self::Internal(fragmentation, _, _) => Cow::Owned(format!( "m{}", diff --git a/rustyms/src/lib.rs b/rustyms/src/lib.rs index 96d410f9..ce3a09e5 100644 --- a/rustyms/src/lib.rs +++ b/rustyms/src/lib.rs @@ -85,7 +85,7 @@ pub use crate::sequence_element::SequenceElement; pub use crate::sequence_position::*; pub use crate::spectrum::{AnnotatableSpectrum, AnnotatedSpectrum, RawSpectrum}; pub use crate::tolerance::*; -pub use aminoacids::AminoAcid; +pub use aminoacids::{AminoAcid, IsAminoAcid}; pub use checked_aminoacid::CheckedAminoAcid; pub use fragment::Fragment; pub use peptidoform::{CompoundPeptidoformIon, Peptidoform, PeptidoformIon}; diff --git a/rustyms/src/sequence_element.rs b/rustyms/src/sequence_element.rs index e53ce32a..9c843973 100644 --- a/rustyms/src/sequence_element.rs +++ b/rustyms/src/sequence_element.rs @@ -117,7 +117,7 @@ impl SequenceElement { if self.ambiguous.is_some() && last_ambiguous != self.ambiguous { write!(f, "(?")?; } - write!(f, "{}", self.aminoacid.char())?; + write!(f, "{}", self.aminoacid)?; for m in &self.modifications { let mut display_ambiguous = false; if let Modification::Ambiguous { id, .. } = m { From 635b8cc5b2e34b60ed74c7895b9f703c26109bba Mon Sep 17 00:00:00 2001 From: Auke Heerdink <21688542+aukeheerdink@users.noreply.github.com> Date: Thu, 13 Feb 2025 11:57:41 +0100 Subject: [PATCH 4/7] Added pKa and Hydro. module --- rustyms/src/aminoacid_hydrophobicity.rs | 0 rustyms/src/aminoacid_pka.rs | 116 ++++++++++++++++++++++++ rustyms/src/aminoacid_properties.rs | 115 ----------------------- rustyms/src/lib.rs | 2 + 4 files changed, 118 insertions(+), 115 deletions(-) create mode 100644 rustyms/src/aminoacid_hydrophobicity.rs create mode 100644 rustyms/src/aminoacid_pka.rs diff --git a/rustyms/src/aminoacid_hydrophobicity.rs b/rustyms/src/aminoacid_hydrophobicity.rs new file mode 100644 index 00000000..e69de29b diff --git a/rustyms/src/aminoacid_pka.rs b/rustyms/src/aminoacid_pka.rs new file mode 100644 index 00000000..bb046b66 --- /dev/null +++ b/rustyms/src/aminoacid_pka.rs @@ -0,0 +1,116 @@ +use serde::{Deserialize, Serialize}; + +use crate::{ + aminoacids::IsAminoAcid, modification::SimpleModification, AminoAcid, Peptidoform, + SemiAmbiguous, +}; + +#[derive(Copy, Clone, PartialEq, PartialOrd, Debug, Serialize, Deserialize)] +pub struct AminoAcidpKa { + ammonium: f64, + sidechain: Option, + carboxyl: f64, +} + +/// A source for pKa values, which can be used to calculate the pKa for peptidoforms. +pub trait pKaSource { + /// Get the pKa values for the given amino acid and modifications. + fn pKa(amino_acid: AA, modifications: &[SimpleModification]) -> Option; + + /// Get the calculated pKa value for the given peptidoform, or None if any of the sequence elements do not have a defined pKa. + fn peptide_pKa(peptidoform: Peptidoform) -> Option { + todo!() + } +} + +/// The pKa values for an amino acid +impl AminoAcidpKa { + const fn new(ammonium: f64, sidechain: Option, carboxyl: f64) -> Self { + Self { + ammonium, + sidechain, + carboxyl, + } + } + pub const fn ammonium(self) -> f64 { + self.ammonium + } + pub const fn sidechain(self) -> Option { + self.sidechain + } + pub const fn carboxyl(self) -> f64 { + self.carboxyl + } +} + +/// pKa values from Lide, D. R. (1991). Handbook of Chemistry and Physics: A Ready Reference Book of Chemical and Physical Data. +pub struct pKaLide1991; + +impl pKaSource for pKaLide1991 { + fn pKa(amino_acid: AminoAcid, modifications: &[SimpleModification]) -> Option { + if !modifications.is_empty() { + return None; + } + match amino_acid { + AminoAcid::Arginine => Some(AminoAcidpKa::new(9.00, Some(12.10), 2.03)), + AminoAcid::Histidine => Some(AminoAcidpKa::new(9.09, Some(6.04), 1.70)), + AminoAcid::Lysine => Some(AminoAcidpKa::new(9.16, Some(10.67), 2.15)), + AminoAcid::AsparticAcid => Some(AminoAcidpKa::new(9.66, Some(3.71), 1.95)), + AminoAcid::GlutamicAcid => Some(AminoAcidpKa::new(9.58, Some(4.15), 2.16)), + AminoAcid::Tyrosine => Some(AminoAcidpKa::new(9.04, Some(10.10), 2.24)), + AminoAcid::Cysteine => Some(AminoAcidpKa::new(10.28, Some(8.14), 1.91)), + AminoAcid::Alanine => Some(AminoAcidpKa::new(9.71, None, 2.33)), + AminoAcid::Glycine => Some(AminoAcidpKa::new(9.58, None, 2.34)), + AminoAcid::Proline => Some(AminoAcidpKa::new(10.47, None, 1.95)), + AminoAcid::Serine => Some(AminoAcidpKa::new(9.05, None, 2.13)), + AminoAcid::Threonine => Some(AminoAcidpKa::new(8.96, None, 2.20)), + AminoAcid::Methionine => Some(AminoAcidpKa::new(9.08, None, 2.16)), + AminoAcid::Phenylalanine => Some(AminoAcidpKa::new(9.09, None, 2.18)), + AminoAcid::Tryptophan => Some(AminoAcidpKa::new(9.34, None, 2.38)), + AminoAcid::Valine => Some(AminoAcidpKa::new(9.52, None, 2.27)), + AminoAcid::Isoleucine => Some(AminoAcidpKa::new(9.60, None, 2.26)), + AminoAcid::Leucine => Some(AminoAcidpKa::new(9.58, None, 2.32)), + AminoAcid::Glutamine => Some(AminoAcidpKa::new(9.00, None, 2.18)), + AminoAcid::Asparagine => Some(AminoAcidpKa::new(8.73, None, 2.16)), + AminoAcid::AmbiguousAsparagine => Some(AminoAcidpKa::new(8.73, None, 2.16)), + AminoAcid::AmbiguousGlutamine => Some(AminoAcidpKa::new(9.00, None, 2.18)), + _ => None, + } + } +} + +/// pKa values from Lehninger, A. L., Nelson, D. L., & Cox, M. M. (2005). Lehninger Principles of Biochemistry. Macmillan. +pub struct pKaLehninger; + +impl pKaSource for pKaLehninger { + fn pKa(amino_acid: AminoAcid, modifications: &[SimpleModification]) -> Option { + if !modifications.is_empty() { + return None; + } + match amino_acid { + AminoAcid::Arginine => Some(AminoAcidpKa::new(9.04, Some(12.48), 2.17)), + AminoAcid::Histidine => Some(AminoAcidpKa::new(9.17, Some(6.00), 1.82)), + AminoAcid::Lysine => Some(AminoAcidpKa::new(8.95, Some(10.53), 2.18)), + AminoAcid::AsparticAcid => Some(AminoAcidpKa::new(9.60, Some(3.65), 1.88)), + AminoAcid::GlutamicAcid => Some(AminoAcidpKa::new(9.67, Some(4.25), 2.19)), + AminoAcid::Tyrosine => Some(AminoAcidpKa::new(9.11, Some(10.07), 2.20)), + AminoAcid::Cysteine => Some(AminoAcidpKa::new(10.28, Some(8.18), 1.96)), + AminoAcid::Alanine => Some(AminoAcidpKa::new(9.69, None, 2.34)), + AminoAcid::Glycine => Some(AminoAcidpKa::new(9.60, None, 2.34)), + AminoAcid::Proline => Some(AminoAcidpKa::new(10.96, None, 1.99)), + AminoAcid::Serine => Some(AminoAcidpKa::new(9.15, None, 2.21)), + AminoAcid::Threonine => Some(AminoAcidpKa::new(9.62, None, 2.11)), + AminoAcid::Methionine => Some(AminoAcidpKa::new(9.21, None, 2.28)), + AminoAcid::Phenylalanine => Some(AminoAcidpKa::new(9.13, None, 1.83)), + AminoAcid::Tryptophan => Some(AminoAcidpKa::new(9.39, None, 2.38)), + AminoAcid::Valine => Some(AminoAcidpKa::new(9.62, None, 2.32)), + AminoAcid::Isoleucine => Some(AminoAcidpKa::new(9.68, None, 2.36)), + AminoAcid::Leucine => Some(AminoAcidpKa::new(9.60, None, 2.36)), + AminoAcid::Glutamine => Some(AminoAcidpKa::new(9.13, None, 2.17)), + AminoAcid::Asparagine => Some(AminoAcidpKa::new(8.80, None, 2.02)), + AminoAcid::AmbiguousAsparagine => Some(AminoAcidpKa::new(8.80, None, 2.02)), + AminoAcid::AmbiguousGlutamine => Some(AminoAcidpKa::new(9.13, None, 2.17)), + _ => None, + } + } +} diff --git a/rustyms/src/aminoacid_properties.rs b/rustyms/src/aminoacid_properties.rs index c7f05bba..98d8ad74 100644 --- a/rustyms/src/aminoacid_properties.rs +++ b/rustyms/src/aminoacid_properties.rs @@ -10,11 +10,6 @@ use serde::{Deserialize, Serialize}; -use crate::{ - aminoacids::IsAminoAcid, modification::SimpleModification, AminoAcid, Peptidoform, - SemiAmbiguous, -}; - /// All amino acid property classes according to IMGT. /// > IMGT standardized criteria for statistical analysis of immunoglobulin V-REGION amino acid properties /// > @@ -282,113 +277,3 @@ pub enum HydrogenBondClass { None, Unknown, } - -#[derive(Copy, Clone, PartialEq, PartialOrd, Debug, Serialize, Deserialize)] -pub struct AminoAcidpKa { - ammonium: f32, - sidechain: Option, - carboxyl: f32, -} - -/// A source for pKa values, which can be used to calculate the pKa for peptidoforms. -pub trait pKaSource { - /// Get the pKa values for the given amino acid and modifications. - fn pKa(amino_acid: AA, modifications: &[SimpleModification]) -> Option; - - /// Get the calculated pKa value for the given peptidoform, or None if any of the sequence elements do not have a defined pKa. - fn peptide_pKa(peptidoform: Peptidoform) -> Option { - todo!() - } -} - -/// The pKa values for an amino acid -impl AminoAcidpKa { - const fn new(ammonium: f32, sidechain: Option, carboxyl: f32) -> Self { - Self { - ammonium, - sidechain, - carboxyl, - } - } - pub const fn ammonium(self) -> f32 { - self.ammonium - } - pub const fn sidechain(self) -> Option { - self.sidechain - } - pub const fn carboxyl(self) -> f32 { - self.carboxyl - } -} - -/// pKa values from Lide, D. R. (1991). Handbook of Chemistry and Physics: A Ready Reference Book of Chemical and Physical Data. -pub struct pKaLide1991; - -impl pKaSource for pKaLide1991 { - fn pKa(amino_acid: AminoAcid, modifications: &[SimpleModification]) -> Option { - if !modifications.is_empty() { - return None; - } - match amino_acid { - AminoAcid::Arginine => Some(AminoAcidpKa::new(9.00, Some(12.10), 2.03)), - AminoAcid::Histidine => Some(AminoAcidpKa::new(9.09, Some(6.04), 1.70)), - AminoAcid::Lysine => Some(AminoAcidpKa::new(9.16, Some(10.67), 2.15)), - AminoAcid::AsparticAcid => Some(AminoAcidpKa::new(9.66, Some(3.71), 1.95)), - AminoAcid::GlutamicAcid => Some(AminoAcidpKa::new(9.58, Some(4.15), 2.16)), - AminoAcid::Tyrosine => Some(AminoAcidpKa::new(9.04, Some(10.10), 2.24)), - AminoAcid::Cysteine => Some(AminoAcidpKa::new(10.28, Some(8.14), 1.91)), - AminoAcid::Alanine => Some(AminoAcidpKa::new(9.71, None, 2.33)), - AminoAcid::Glycine => Some(AminoAcidpKa::new(9.58, None, 2.34)), - AminoAcid::Proline => Some(AminoAcidpKa::new(10.47, None, 1.95)), - AminoAcid::Serine => Some(AminoAcidpKa::new(9.05, None, 2.13)), - AminoAcid::Threonine => Some(AminoAcidpKa::new(8.96, None, 2.20)), - AminoAcid::Methionine => Some(AminoAcidpKa::new(9.08, None, 2.16)), - AminoAcid::Phenylalanine => Some(AminoAcidpKa::new(9.09, None, 2.18)), - AminoAcid::Tryptophan => Some(AminoAcidpKa::new(9.34, None, 2.38)), - AminoAcid::Valine => Some(AminoAcidpKa::new(9.52, None, 2.27)), - AminoAcid::Isoleucine => Some(AminoAcidpKa::new(9.60, None, 2.26)), - AminoAcid::Leucine => Some(AminoAcidpKa::new(9.58, None, 2.32)), - AminoAcid::Glutamine => Some(AminoAcidpKa::new(9.00, None, 2.18)), - AminoAcid::Asparagine => Some(AminoAcidpKa::new(8.73, None, 2.16)), - AminoAcid::AmbiguousAsparagine => Some(AminoAcidpKa::new(8.73, None, 2.16)), - AminoAcid::AmbiguousGlutamine => Some(AminoAcidpKa::new(9.00, None, 2.18)), - _ => None, - } - } -} - -/// pKa values from Lehninger, A. L., Nelson, D. L., & Cox, M. M. (2005). Lehninger Principles of Biochemistry. Macmillan. -pub struct pKaLehninger; - -impl pKaSource for pKaLehninger { - fn pKa(amino_acid: AminoAcid, modifications: &[SimpleModification]) -> Option { - if !modifications.is_empty() { - return None; - } - match amino_acid { - AminoAcid::Arginine => Some(AminoAcidpKa::new(9.04, Some(12.48), 2.17)), - AminoAcid::Histidine => Some(AminoAcidpKa::new(9.17, Some(6.00), 1.82)), - AminoAcid::Lysine => Some(AminoAcidpKa::new(8.95, Some(10.53), 2.18)), - AminoAcid::AsparticAcid => Some(AminoAcidpKa::new(9.60, Some(3.65), 1.88)), - AminoAcid::GlutamicAcid => Some(AminoAcidpKa::new(9.67, Some(4.25), 2.19)), - AminoAcid::Tyrosine => Some(AminoAcidpKa::new(9.11, Some(10.07), 2.20)), - AminoAcid::Cysteine => Some(AminoAcidpKa::new(10.28, Some(8.18), 1.96)), - AminoAcid::Alanine => Some(AminoAcidpKa::new(9.69, None, 2.34)), - AminoAcid::Glycine => Some(AminoAcidpKa::new(9.60, None, 2.34)), - AminoAcid::Proline => Some(AminoAcidpKa::new(10.96, None, 1.99)), - AminoAcid::Serine => Some(AminoAcidpKa::new(9.15, None, 2.21)), - AminoAcid::Threonine => Some(AminoAcidpKa::new(9.62, None, 2.11)), - AminoAcid::Methionine => Some(AminoAcidpKa::new(9.21, None, 2.28)), - AminoAcid::Phenylalanine => Some(AminoAcidpKa::new(9.13, None, 1.83)), - AminoAcid::Tryptophan => Some(AminoAcidpKa::new(9.39, None, 2.38)), - AminoAcid::Valine => Some(AminoAcidpKa::new(9.62, None, 2.32)), - AminoAcid::Isoleucine => Some(AminoAcidpKa::new(9.68, None, 2.36)), - AminoAcid::Leucine => Some(AminoAcidpKa::new(9.60, None, 2.36)), - AminoAcid::Glutamine => Some(AminoAcidpKa::new(9.13, None, 2.17)), - AminoAcid::Asparagine => Some(AminoAcidpKa::new(8.80, None, 2.02)), - AminoAcid::AmbiguousAsparagine => Some(AminoAcidpKa::new(8.80, None, 2.02)), - AminoAcid::AmbiguousGlutamine => Some(AminoAcidpKa::new(9.13, None, 2.17)), - _ => None, - } - } -} diff --git a/rustyms/src/lib.rs b/rustyms/src/lib.rs index ce3a09e5..39bf7268 100644 --- a/rustyms/src/lib.rs +++ b/rustyms/src/lib.rs @@ -34,6 +34,8 @@ mod formula; #[path = "shared/csv.rs"] pub mod csv; +pub mod aminoacid_hydrophobicity; +pub mod aminoacid_pka; pub mod aminoacid_properties; mod aminoacids; mod checked_aminoacid; From 15a7ca72d6c376ab9fd312d33b620eaebd699b38 Mon Sep 17 00:00:00 2001 From: Auke Heerdink <21688542+aukeheerdink@users.noreply.github.com> Date: Fri, 28 Feb 2025 18:34:28 +0100 Subject: [PATCH 5/7] Added isoelectric point calculation --- Cargo.toml | 2 +- rustyms/src/aminoacid_pka.rs | 261 +++++++++++++----- .../src/identification/identified_peptide.rs | 9 +- 3 files changed, 192 insertions(+), 80 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2b0e5225..2d132bed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,7 +33,7 @@ directories = "6.0" flate2 = "1.0" iai-callgrind = "0.14" itertools = "0.14" -mzdata = "0.44" +mzdata = "0.46" ndarray = "0.16" ordered-float = { version = "4.6", features = ["serde"] } probability = "0.20" diff --git a/rustyms/src/aminoacid_pka.rs b/rustyms/src/aminoacid_pka.rs index bb046b66..5ece62c1 100644 --- a/rustyms/src/aminoacid_pka.rs +++ b/rustyms/src/aminoacid_pka.rs @@ -1,115 +1,228 @@ use serde::{Deserialize, Serialize}; use crate::{ - aminoacids::IsAminoAcid, modification::SimpleModification, AminoAcid, Peptidoform, - SemiAmbiguous, + aminoacid_properties::ChargeClass, aminoacids::IsAminoAcid, + modification::SimpleModificationInner, AminoAcid, AtMax, Peptidoform, SemiAmbiguous, }; -#[derive(Copy, Clone, PartialEq, PartialOrd, Debug, Serialize, Deserialize)] -pub struct AminoAcidpKa { - ammonium: f64, - sidechain: Option, - carboxyl: f64, -} - /// A source for pKa values, which can be used to calculate the pKa for peptidoforms. -pub trait pKaSource { +pub trait PKaSource { /// Get the pKa values for the given amino acid and modifications. - fn pKa(amino_acid: AA, modifications: &[SimpleModification]) -> Option; + #[allow(non_snake_case)] + fn pKa( + amino_acid: AA, + side_chain_modifications: impl Iterator>, + n_terminal_modifications: Option>>, + c_terminal_modifications: Option>>, + ) -> Option; +} +impl> Peptidoform { /// Get the calculated pKa value for the given peptidoform, or None if any of the sequence elements do not have a defined pKa. - fn peptide_pKa(peptidoform: Peptidoform) -> Option { - todo!() + #[allow(non_snake_case)] + pub fn isoelectic_point>(&self) -> Option { + let sequence = self.sequence(); + if sequence.is_empty() { + return None; + } + + // Collect all ionizable groups with their pKa values + let mut ionizable = Vec::with_capacity(sequence.len() + 2); + + // N-terminal + let first = sequence.first().unwrap(); + ionizable.push(( + ChargeClass::Positive, + Source::pKa( + first.aminoacid.aminoacid(), + first.modifications.iter().filter_map(|m| m.simple()), + Some(self.get_n_term().iter().filter_map(|m| m.simple())), + (self.len() == 1).then_some(self.get_c_term().iter().filter_map(|m| m.simple())), + )? + .n_term(), + )); // N-terminal is always positive + + // C-terminal + let last = sequence.last().unwrap(); + ionizable.push(( + ChargeClass::Negative, + Source::pKa( + last.aminoacid.aminoacid(), + last.modifications.iter().filter_map(|m| m.simple()), + (self.len() == 1).then_some(self.get_n_term().iter().filter_map(|m| m.simple())), + Some(self.get_c_term().iter().filter_map(|m| m.simple())), + )? + .c_term(), + )); // C-terminal is always negative + + // Handle sidechains + for (index, aa) in sequence.iter().enumerate() { + if let Some(sidechain) = Source::pKa( + aa.aminoacid.aminoacid(), + aa.modifications.iter().filter_map(|m| m.simple()), + (index == 0).then_some(self.get_n_term().iter().filter_map(|m| m.simple())), + (index == self.len() - 1) + .then_some(self.get_n_term().iter().filter_map(|m| m.simple())), + )? + .sidechain() + { + let charge_class = aa.aminoacid.aminoacid().charge_class(); + match charge_class { + ChargeClass::Positive | ChargeClass::Negative => { + ionizable.push((charge_class, sidechain)); + } + ChargeClass::Unknown => return None, + ChargeClass::Uncharged => (), + } + } + } + + // Binary search between pH 0-14 to find isoelectric point + let mut low = 0.0; + let mut high = 14.0; + let mut new_pi = 7.775; + const EPSILON: f64 = 0.0001; + + while (high - low) > EPSILON { + new_pi = (low + high) / 2.0; + let charge = calculate_charge(new_pi, &ionizable); + + if charge > 0.0 { + low = new_pi; + } else { + high = new_pi; + } + } + + Some((new_pi * 100.0).round() / 100.0) + } +} + +fn calculate_charge(pH: f64, ionizable: &[(ChargeClass, f64)]) -> f64 { + let mut charge = 0.0; + + for (class, pka) in ionizable { + match class { + ChargeClass::Positive => charge += 1.0 / (10.0_f64.powf(pH - pka) + 1.0), + ChargeClass::Negative => charge -= 1.0 / (10.0_f64.powf(pka - pH) + 1.0), + _ => {} + } } + + charge +} +/// The pKa for a specific Amino Acid +#[derive(Copy, Clone, PartialEq, PartialOrd, Debug, Serialize, Deserialize)] +pub struct AminoAcidPKa { + n_term: f64, + sidechain: Option, + c_term: f64, } -/// The pKa values for an amino acid -impl AminoAcidpKa { - const fn new(ammonium: f64, sidechain: Option, carboxyl: f64) -> Self { +impl AminoAcidPKa { + const fn new(n_term: f64, sidechain: Option, c_term: f64) -> Self { Self { - ammonium, + n_term, sidechain, - carboxyl, + c_term, } } - pub const fn ammonium(self) -> f64 { - self.ammonium + + /// Get the pKa value for the n-term of the Amino acid + pub const fn n_term(self) -> f64 { + self.n_term } + + /// Get the pKa value for the side-chain group of the Amino acid pub const fn sidechain(self) -> Option { self.sidechain } - pub const fn carboxyl(self) -> f64 { - self.carboxyl + + /// Get the pKa value for the c-term of the Amino acid + pub const fn c_term(self) -> f64 { + self.c_term } } /// pKa values from Lide, D. R. (1991). Handbook of Chemistry and Physics: A Ready Reference Book of Chemical and Physical Data. -pub struct pKaLide1991; +pub struct PKaLide1991; -impl pKaSource for pKaLide1991 { - fn pKa(amino_acid: AminoAcid, modifications: &[SimpleModification]) -> Option { - if !modifications.is_empty() { +impl PKaSource for PKaLide1991 { + fn pKa( + amino_acid: AminoAcid, + mut side_chain_modifications: impl Iterator>, + n_terminal_modifications: Option>>, + c_terminal_modifications: Option>>, + ) -> Option { + if side_chain_modifications.next().is_some() + || n_terminal_modifications.is_some_and(|mut m| m.next().is_some()) + || c_terminal_modifications.is_some_and(|mut m| m.next().is_some()) + { return None; } match amino_acid { - AminoAcid::Arginine => Some(AminoAcidpKa::new(9.00, Some(12.10), 2.03)), - AminoAcid::Histidine => Some(AminoAcidpKa::new(9.09, Some(6.04), 1.70)), - AminoAcid::Lysine => Some(AminoAcidpKa::new(9.16, Some(10.67), 2.15)), - AminoAcid::AsparticAcid => Some(AminoAcidpKa::new(9.66, Some(3.71), 1.95)), - AminoAcid::GlutamicAcid => Some(AminoAcidpKa::new(9.58, Some(4.15), 2.16)), - AminoAcid::Tyrosine => Some(AminoAcidpKa::new(9.04, Some(10.10), 2.24)), - AminoAcid::Cysteine => Some(AminoAcidpKa::new(10.28, Some(8.14), 1.91)), - AminoAcid::Alanine => Some(AminoAcidpKa::new(9.71, None, 2.33)), - AminoAcid::Glycine => Some(AminoAcidpKa::new(9.58, None, 2.34)), - AminoAcid::Proline => Some(AminoAcidpKa::new(10.47, None, 1.95)), - AminoAcid::Serine => Some(AminoAcidpKa::new(9.05, None, 2.13)), - AminoAcid::Threonine => Some(AminoAcidpKa::new(8.96, None, 2.20)), - AminoAcid::Methionine => Some(AminoAcidpKa::new(9.08, None, 2.16)), - AminoAcid::Phenylalanine => Some(AminoAcidpKa::new(9.09, None, 2.18)), - AminoAcid::Tryptophan => Some(AminoAcidpKa::new(9.34, None, 2.38)), - AminoAcid::Valine => Some(AminoAcidpKa::new(9.52, None, 2.27)), - AminoAcid::Isoleucine => Some(AminoAcidpKa::new(9.60, None, 2.26)), - AminoAcid::Leucine => Some(AminoAcidpKa::new(9.58, None, 2.32)), - AminoAcid::Glutamine => Some(AminoAcidpKa::new(9.00, None, 2.18)), - AminoAcid::Asparagine => Some(AminoAcidpKa::new(8.73, None, 2.16)), - AminoAcid::AmbiguousAsparagine => Some(AminoAcidpKa::new(8.73, None, 2.16)), - AminoAcid::AmbiguousGlutamine => Some(AminoAcidpKa::new(9.00, None, 2.18)), + AminoAcid::Arginine => Some(AminoAcidPKa::new(9.00, Some(12.10), 2.03)), + AminoAcid::Histidine => Some(AminoAcidPKa::new(9.09, Some(6.04), 1.70)), + AminoAcid::Lysine => Some(AminoAcidPKa::new(9.16, Some(10.67), 2.15)), + AminoAcid::AsparticAcid => Some(AminoAcidPKa::new(9.66, Some(3.71), 1.95)), + AminoAcid::GlutamicAcid => Some(AminoAcidPKa::new(9.58, Some(4.15), 2.16)), + AminoAcid::Tyrosine => Some(AminoAcidPKa::new(9.04, Some(10.10), 2.24)), + AminoAcid::Cysteine => Some(AminoAcidPKa::new(10.28, Some(8.14), 1.91)), + AminoAcid::Alanine => Some(AminoAcidPKa::new(9.71, None, 2.33)), + AminoAcid::Glycine => Some(AminoAcidPKa::new(9.58, None, 2.34)), + AminoAcid::Proline => Some(AminoAcidPKa::new(10.47, None, 1.95)), + AminoAcid::Serine => Some(AminoAcidPKa::new(9.05, None, 2.13)), + AminoAcid::Threonine => Some(AminoAcidPKa::new(8.96, None, 2.20)), + AminoAcid::Methionine => Some(AminoAcidPKa::new(9.08, None, 2.16)), + AminoAcid::Phenylalanine => Some(AminoAcidPKa::new(9.09, None, 2.18)), + AminoAcid::Tryptophan => Some(AminoAcidPKa::new(9.34, None, 2.38)), + AminoAcid::Valine => Some(AminoAcidPKa::new(9.52, None, 2.27)), + AminoAcid::Isoleucine => Some(AminoAcidPKa::new(9.60, None, 2.26)), + AminoAcid::Leucine => Some(AminoAcidPKa::new(9.58, None, 2.32)), + AminoAcid::Glutamine => Some(AminoAcidPKa::new(9.00, None, 2.18)), + AminoAcid::Asparagine => Some(AminoAcidPKa::new(8.73, None, 2.16)), _ => None, } } } /// pKa values from Lehninger, A. L., Nelson, D. L., & Cox, M. M. (2005). Lehninger Principles of Biochemistry. Macmillan. -pub struct pKaLehninger; +pub struct PKaLehninger; -impl pKaSource for pKaLehninger { - fn pKa(amino_acid: AminoAcid, modifications: &[SimpleModification]) -> Option { - if !modifications.is_empty() { +impl PKaSource for PKaLehninger { + fn pKa( + amino_acid: AminoAcid, + mut side_chain_modifications: impl Iterator>, + n_terminal_modifications: Option>>, + c_terminal_modifications: Option>>, + ) -> Option { + if side_chain_modifications.next().is_some() + || n_terminal_modifications.is_some_and(|mut m| m.next().is_some()) + || c_terminal_modifications.is_some_and(|mut m| m.next().is_some()) + { return None; } match amino_acid { - AminoAcid::Arginine => Some(AminoAcidpKa::new(9.04, Some(12.48), 2.17)), - AminoAcid::Histidine => Some(AminoAcidpKa::new(9.17, Some(6.00), 1.82)), - AminoAcid::Lysine => Some(AminoAcidpKa::new(8.95, Some(10.53), 2.18)), - AminoAcid::AsparticAcid => Some(AminoAcidpKa::new(9.60, Some(3.65), 1.88)), - AminoAcid::GlutamicAcid => Some(AminoAcidpKa::new(9.67, Some(4.25), 2.19)), - AminoAcid::Tyrosine => Some(AminoAcidpKa::new(9.11, Some(10.07), 2.20)), - AminoAcid::Cysteine => Some(AminoAcidpKa::new(10.28, Some(8.18), 1.96)), - AminoAcid::Alanine => Some(AminoAcidpKa::new(9.69, None, 2.34)), - AminoAcid::Glycine => Some(AminoAcidpKa::new(9.60, None, 2.34)), - AminoAcid::Proline => Some(AminoAcidpKa::new(10.96, None, 1.99)), - AminoAcid::Serine => Some(AminoAcidpKa::new(9.15, None, 2.21)), - AminoAcid::Threonine => Some(AminoAcidpKa::new(9.62, None, 2.11)), - AminoAcid::Methionine => Some(AminoAcidpKa::new(9.21, None, 2.28)), - AminoAcid::Phenylalanine => Some(AminoAcidpKa::new(9.13, None, 1.83)), - AminoAcid::Tryptophan => Some(AminoAcidpKa::new(9.39, None, 2.38)), - AminoAcid::Valine => Some(AminoAcidpKa::new(9.62, None, 2.32)), - AminoAcid::Isoleucine => Some(AminoAcidpKa::new(9.68, None, 2.36)), - AminoAcid::Leucine => Some(AminoAcidpKa::new(9.60, None, 2.36)), - AminoAcid::Glutamine => Some(AminoAcidpKa::new(9.13, None, 2.17)), - AminoAcid::Asparagine => Some(AminoAcidpKa::new(8.80, None, 2.02)), - AminoAcid::AmbiguousAsparagine => Some(AminoAcidpKa::new(8.80, None, 2.02)), - AminoAcid::AmbiguousGlutamine => Some(AminoAcidpKa::new(9.13, None, 2.17)), + AminoAcid::Arginine => Some(AminoAcidPKa::new(9.04, Some(12.48), 2.17)), + AminoAcid::Histidine => Some(AminoAcidPKa::new(9.17, Some(6.00), 1.82)), + AminoAcid::Lysine => Some(AminoAcidPKa::new(8.95, Some(10.53), 2.18)), + AminoAcid::AsparticAcid => Some(AminoAcidPKa::new(9.60, Some(3.65), 1.88)), + AminoAcid::GlutamicAcid => Some(AminoAcidPKa::new(9.67, Some(4.25), 2.19)), + AminoAcid::Tyrosine => Some(AminoAcidPKa::new(9.11, Some(10.07), 2.20)), + AminoAcid::Cysteine => Some(AminoAcidPKa::new(10.28, Some(8.18), 1.96)), + AminoAcid::Alanine => Some(AminoAcidPKa::new(9.69, None, 2.34)), + AminoAcid::Glycine => Some(AminoAcidPKa::new(9.60, None, 2.34)), + AminoAcid::Proline => Some(AminoAcidPKa::new(10.96, None, 1.99)), + AminoAcid::Serine => Some(AminoAcidPKa::new(9.15, None, 2.21)), + AminoAcid::Threonine => Some(AminoAcidPKa::new(9.62, None, 2.11)), + AminoAcid::Methionine => Some(AminoAcidPKa::new(9.21, None, 2.28)), + AminoAcid::Phenylalanine => Some(AminoAcidPKa::new(9.13, None, 1.83)), + AminoAcid::Tryptophan => Some(AminoAcidPKa::new(9.39, None, 2.38)), + AminoAcid::Valine => Some(AminoAcidPKa::new(9.62, None, 2.32)), + AminoAcid::Isoleucine => Some(AminoAcidPKa::new(9.68, None, 2.36)), + AminoAcid::Leucine => Some(AminoAcidPKa::new(9.60, None, 2.36)), + AminoAcid::Glutamine => Some(AminoAcidPKa::new(9.13, None, 2.17)), + AminoAcid::Asparagine => Some(AminoAcidPKa::new(8.80, None, 2.02)), _ => None, } } diff --git a/rustyms/src/identification/identified_peptide.rs b/rustyms/src/identification/identified_peptide.rs index 054ee678..cfa7beae 100644 --- a/rustyms/src/identification/identified_peptide.rs +++ b/rustyms/src/identification/identified_peptide.rs @@ -506,9 +506,9 @@ impl IdentifiedPeptide { precursor_mz: mz, .. }) | MetaData::MSFragger(MSFraggerData { mz, .. }) => Some(*mz), - MetaData::MZTab(MZTabData { mz, .. }) | MetaData::MaxQuant(MaxQuantData { mz, .. }) => { - *mz - } + MetaData::MZTab(MZTabData { mz, .. }) + | MetaData::MaxQuant(MaxQuantData { mz, .. }) + | MetaData::DeepNovoFamily(DeepNovoFamilyData { mz, .. }) => *mz, MetaData::Sage(SageData { mass, z, .. }) | MetaData::NovoB(NovoBData { mass, z, .. }) | MetaData::PLink(PLinkData { mass, z, .. }) => { @@ -516,8 +516,7 @@ impl IdentifiedPeptide { mass.value / (z.value as f64), )) } - MetaData::DeepNovoFamily(_) - | MetaData::Fasta(_) + MetaData::Fasta(_) | MetaData::SpectrumSequenceList(_) | MetaData::PowerNovo(_) | MetaData::PepNet(_) => None, From 107bc1319c807c07a827b29983b3bddbb2e21371 Mon Sep 17 00:00:00 2001 From: Auke Heerdink <21688542+aukeheerdink@users.noreply.github.com> Date: Tue, 1 Apr 2025 14:42:12 +0200 Subject: [PATCH 6/7] Refactored pka and added docs --- rustyms/src/aminoacid/is_amino_acid.rs | 65 ++++++++ .../is_amino_acid_impl.rs} | 68 +-------- rustyms/src/aminoacid/mod.rs | 4 + .../{aminoacid_pka.rs => aminoacid/pka.rs} | 143 +++++++++++++++++- .../properties.rs} | 0 rustyms/src/aminoacid_hydrophobicity.rs | 0 rustyms/src/checked_aminoacid.rs | 4 +- rustyms/src/element.rs | 2 +- rustyms/src/lib.rs | 7 +- 9 files changed, 218 insertions(+), 75 deletions(-) create mode 100644 rustyms/src/aminoacid/is_amino_acid.rs rename rustyms/src/{aminoacids.rs => aminoacid/is_amino_acid_impl.rs} (97%) create mode 100644 rustyms/src/aminoacid/mod.rs rename rustyms/src/{aminoacid_pka.rs => aminoacid/pka.rs} (64%) rename rustyms/src/{aminoacid_properties.rs => aminoacid/properties.rs} (100%) delete mode 100644 rustyms/src/aminoacid_hydrophobicity.rs diff --git a/rustyms/src/aminoacid/is_amino_acid.rs b/rustyms/src/aminoacid/is_amino_acid.rs new file mode 100644 index 00000000..882660ab --- /dev/null +++ b/rustyms/src/aminoacid/is_amino_acid.rs @@ -0,0 +1,65 @@ +//! Module used create the [IsAminoAcid] trait + +use crate::{ + formula::MolecularFormula, system::Mass, MassMode, Multi, MultiChemical, NeutralLoss, + SequencePosition, +}; + +use std::borrow::Cow; + +/// A general trait to define amino acids. +pub trait IsAminoAcid: MultiChemical { + /// The full name for this amino acid. + fn name(&self) -> Cow<'_, str>; + /// The three letter code for this amino acid. Or None if there is no common three letter + /// definition for this amino acid. + fn three_letter_code(&self) -> Option>; + /// The one letter code for this amino acid. Or None if there is no common single character + /// definition for this amino acid. + #[doc(alias = "code")] + fn one_letter_code(&self) -> Option; + /// The ProForma definition for this amino acid. If this is not a simple amino acid it can be + /// defined as an amino acid with an additional modification. For example `X[H9C2N2]` could be + /// used if Arginine was not defined as `R` in ProForma. + fn pro_forma_definition(&self) -> Cow<'_, str>; + /// The monoisotopic mass of this amino acid. Should be redefined for better performance. + fn monoisotopic_mass(&self) -> Cow<'_, Multi> { + Cow::Owned( + self.formulas() + .iter() + .map(MolecularFormula::monoisotopic_mass) + .collect(), + ) + } + /// The average weight of this amino acid. Should be redefined for better performance. + fn average_weight(&self) -> Cow<'_, Multi> { + Cow::Owned( + self.formulas() + .iter() + .map(MolecularFormula::average_weight) + .collect(), + ) + } + /// The mass with a given mass mode for this amino acid. Should be redefined for better performance. + fn mass(&self, mode: MassMode) -> Cow<'_, Multi> { + Cow::Owned(self.formulas().iter().map(|f| f.mass(mode)).collect()) + } + /// The molecular formula of the side chain of the amino acid. The `sequence_index` and + /// `peptidoform_index` are used to keep track of ambiguous amino acids. + fn side_chain( + &self, + sequence_index: SequencePosition, + peptidoform_index: usize, + ) -> Cow<'_, Multi>; + /// The molecular formulas that can fragment for satellite ions (d and w). Commonly the fragment + /// after the second carbon into the side chain. `MolecularFormula::default()` can be returned + /// if no satellite ions are possible. The `sequence_index` and `peptidoform_index` are used to + /// keep track of ambiguous amino acids. + fn satellite_ion_fragments( + &self, + sequence_index: SequencePosition, + peptidoform_index: usize, + ) -> Option>>; + /// Common neutral losses for the immonium ion of this amino acid. + fn immonium_losses(&self) -> Cow<'_, [NeutralLoss]>; +} diff --git a/rustyms/src/aminoacids.rs b/rustyms/src/aminoacid/is_amino_acid_impl.rs similarity index 97% rename from rustyms/src/aminoacids.rs rename to rustyms/src/aminoacid/is_amino_acid_impl.rs index c81ad472..39877f04 100644 --- a/rustyms/src/aminoacids.rs +++ b/rustyms/src/aminoacid/is_amino_acid_impl.rs @@ -1,3 +1,7 @@ +//! Module used define the implementations for the [IsAminoAcid] trait + +use std::borrow::Cow; + use serde::{Deserialize, Serialize}; use crate::{ @@ -5,68 +9,10 @@ use crate::{ fragment::{Fragment, FragmentType, PeptidePosition}, model::*, molecular_charge::CachedCharge, - system::Mass, - MassMode, Multi, MultiChemical, NeutralLoss, SequencePosition, + Multi, MultiChemical, NeutralLoss, SequencePosition, }; -use std::borrow::Cow; - -/// A general trait to define amino acids. -pub trait IsAminoAcid: MultiChemical { - /// The full name for this amino acid. - fn name(&self) -> Cow<'_, str>; - /// The three letter code for this amino acid. Or None if there is no common three letter - /// definition for this amino acid. - fn three_letter_code(&self) -> Option>; - /// The one letter code for this amino acid. Or None if there is no common single character - /// definition for this amino acid. - #[doc(alias = "code")] - fn one_letter_code(&self) -> Option; - /// The ProForma definition for this amino acid. If this is not a simple amino acid it can be - /// defined as an amino acid with an additional modification. For example `X[H9C2N2]` could be - /// used if Arginine was not defined as `R` in ProForma. - fn pro_forma_definition(&self) -> Cow<'_, str>; - /// The monoisotopic mass of this amino acid. Should be redefined for better performance. - fn monoisotopic_mass(&self) -> Cow<'_, Multi> { - Cow::Owned( - self.formulas() - .iter() - .map(MolecularFormula::monoisotopic_mass) - .collect(), - ) - } - /// The average weight of this amino acid. Should be redefined for better performance. - fn average_weight(&self) -> Cow<'_, Multi> { - Cow::Owned( - self.formulas() - .iter() - .map(MolecularFormula::average_weight) - .collect(), - ) - } - /// The mass with a given mass mode for this amino acid. Should be redefined for better performance. - fn mass(&self, mode: MassMode) -> Cow<'_, Multi> { - Cow::Owned(self.formulas().iter().map(|f| f.mass(mode)).collect()) - } - /// The molecular formula of the side chain of the amino acid. The `sequence_index` and - /// `peptidoform_index` are used to keep track of ambiguous amino acids. - fn side_chain( - &self, - sequence_index: SequencePosition, - peptidoform_index: usize, - ) -> Cow<'_, Multi>; - /// The molecular formulas that can fragment for satellite ions (d and w). Commonly the fragment - /// after the second carbon into the side chain. `MolecularFormula::default()` can be returned - /// if no satellite ions are possible. The `sequence_index` and `peptidoform_index` are used to - /// keep track of ambiguous amino acids. - fn satellite_ion_fragments( - &self, - sequence_index: SequencePosition, - peptidoform_index: usize, - ) -> Option>>; - /// Common neutral losses for the immonium ion of this amino acid. - fn immonium_losses(&self) -> Cow<'_, [NeutralLoss]>; -} +use super::is_amino_acid::IsAminoAcid; impl std::fmt::Display for dyn IsAminoAcid { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -74,7 +20,7 @@ impl std::fmt::Display for dyn IsAminoAcid { } } -include!("shared/aminoacid.rs"); +include!("../shared/aminoacid.rs"); impl std::fmt::Display for AminoAcid { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { diff --git a/rustyms/src/aminoacid/mod.rs b/rustyms/src/aminoacid/mod.rs new file mode 100644 index 00000000..ece76e4e --- /dev/null +++ b/rustyms/src/aminoacid/mod.rs @@ -0,0 +1,4 @@ +pub mod is_amino_acid; +pub mod is_amino_acid_impl; +pub mod pka; +pub mod properties; diff --git a/rustyms/src/aminoacid_pka.rs b/rustyms/src/aminoacid/pka.rs similarity index 64% rename from rustyms/src/aminoacid_pka.rs rename to rustyms/src/aminoacid/pka.rs index 5ece62c1..ff19fa1c 100644 --- a/rustyms/src/aminoacid_pka.rs +++ b/rustyms/src/aminoacid/pka.rs @@ -1,10 +1,14 @@ +//! Module used to store and calculate pKa and isoelectric point values for a given [AminoAcid] or [Peptidoform] respectively + use serde::{Deserialize, Serialize}; use crate::{ - aminoacid_properties::ChargeClass, aminoacids::IsAminoAcid, - modification::SimpleModificationInner, AminoAcid, AtMax, Peptidoform, SemiAmbiguous, + aminoacid::properties::ChargeClass, modification::SimpleModificationInner, AminoAcid, AtMax, + Peptidoform, SemiAmbiguous, }; +use super::is_amino_acid::IsAminoAcid; + /// A source for pKa values, which can be used to calculate the pKa for peptidoforms. pub trait PKaSource { /// Get the pKa values for the given amino acid and modifications. @@ -18,6 +22,27 @@ pub trait PKaSource { } impl> Peptidoform { + /// Get the calculated isoelectric point (pI) for the peptidoform, or None if any sequence elements lack pKa values. + /// + /// The isoelectric point is the pH at which the net charge of the peptidoform is zero. This is determined using a binary + /// search between pH 0 and 14. The charge at each pH is computed using the Henderson-Hasselbalch equation with pKa values + /// from the provided `PKaSource`, considering N-terminal, C-terminal, and sidechain ionizable groups. + /// + /// # Example + /// ```rust + /// # use rustyms::{Peptidoform, aminoacid::pka::{PKaSource, PKaLide1991}}; + /// // Create a SemiAmbiguous Peptidoform for glutamic acid (E) and Alanine (A) + /// let peptidoform = Peptidoform::pro_forma(&"EMEVEESPEK", None).unwrap().into_semi_ambiguous().unwrap(); + /// let pi = peptidoform.isoelectic_point::(); + /// // The calculated pI is approximately 3.57 (bacause it's rounded to 2 decimal points) based on Lide 1991 pKa values + /// assert_eq!(pi, Some(3.57)); + /// ``` + /// + /// # Shortcomings + /// - **Naive Approach**: Does not account for interactions between ionizable groups. + /// - **Modifications Ignored**: Modifications affecting pKa are not considered. + /// - **Environmental Factors**: Assumes pKa values are independent of sequence and environment. + /// /// Get the calculated pKa value for the given peptidoform, or None if any of the sequence elements do not have a defined pKa. #[allow(non_snake_case)] pub fn isoelectic_point>(&self) -> Option { @@ -29,8 +54,8 @@ impl> Peptidoform { // Collect all ionizable groups with their pKa values let mut ionizable = Vec::with_capacity(sequence.len() + 2); - // N-terminal - let first = sequence.first().unwrap(); + // Handle N-terminal + let first = sequence.first()?; ionizable.push(( ChargeClass::Positive, Source::pKa( @@ -42,8 +67,8 @@ impl> Peptidoform { .n_term(), )); // N-terminal is always positive - // C-terminal - let last = sequence.last().unwrap(); + // Handle C-terminal + let last = sequence.last()?; ionizable.push(( ChargeClass::Negative, Source::pKa( @@ -227,3 +252,109 @@ impl PKaSource for PKaLehninger { } } } + +#[cfg(test)] +#[expect(clippy::float_cmp, clippy::missing_panics_doc)] +mod tests { + use super::*; + use crate::{modification::SimpleModification, Peptidoform, SemiAmbiguous}; + + // Helper to create a Peptidoform from a list of amino acids + fn create_peptidoform(aas: &str) -> Peptidoform { + Peptidoform::pro_forma(aas, None) + .unwrap() + .into_semi_ambiguous() + .unwrap() + } + + // Helper function to test pKa values for a given source + fn test_pka>( + test_cases: &[(AminoAcid, Option<(f64, Option, f64)>)], + ) { + for (aa, maybe_values) in test_cases { + if let Some((n_term, sidechain, c_term)) = maybe_values { + let pka = Source::pKa( + *aa, + std::iter::empty::(), + None::>, + None::>, + ) + .unwrap_or_else(|| panic!("Missing pKa for {aa:?}")); + + assert_eq!(pka.n_term(), *n_term, "N-term mismatch for {aa:?}"); + assert_eq!(pka.sidechain(), *sidechain, "Sidechain mismatch for {aa:?}"); + assert_eq!(pka.c_term(), *c_term, "C-term mismatch for {aa:?}"); + } else { + assert!(maybe_values.is_none(), "Expected None for {aa:?}"); + } + } + } + + // Helper function to test an isoelectric point value given a source + fn test_isoelectric_point>(cases: &[(&str, Option)]) { + for &(seq, expected) in cases { + let peptide = create_peptidoform(seq); + let iso = peptide.isoelectic_point::(); + assert_eq!( + iso, expected, + "Isoelectric point mismatch for peptide: {seq}" + ); + } + } + + #[test] + fn test_pka_lide1991() { + let test_cases = [ + (AminoAcid::Arginine, Some((9.00, Some(12.10), 2.03))), + (AminoAcid::GlutamicAcid, Some((9.58, Some(4.15), 2.16))), + (AminoAcid::Alanine, Some((9.71, None, 2.33))), + (AminoAcid::Histidine, Some((9.09, Some(6.04), 1.70))), + (AminoAcid::Unknown, None), + ]; + + test_pka::(&test_cases); + } + + #[test] + fn test_pka_lehninger() { + let test_cases = [ + (AminoAcid::Cysteine, Some((10.28, Some(8.18), 1.96))), + (AminoAcid::AsparticAcid, Some((9.60, Some(3.65), 1.88))), + (AminoAcid::Isoleucine, Some((9.68, None, 2.36))), + (AminoAcid::Tryptophan, Some((9.39, None, 2.38))), + (AminoAcid::Selenocysteine, None), + ]; + + test_pka::(&test_cases); + } + + #[test] + fn test_isoelectric_point_lide1991() { + let test_cases = [ + ("E", Some(3.16)), + ("A", Some(6.02)), + ("DE", Some(2.85)), + ("HR", Some(10.6)), + ("KDEH", Some(5.17)), + ("AXRT", None), + ("AXRT[Oxidation]", None), + ]; + + test_isoelectric_point::(&test_cases); + } + + #[test] + fn test_isoelectric_point_lehninger() { + let test_cases = [ + ("G", Some(5.97)), + ("Y", Some(5.65)), + ("CQ", Some(6.23)), + ("KP", Some(9.74)), + ("FIVS", Some(5.67)), + ("TKLB", None), + ("TK[Oxidation]LB", None), + ]; + + test_isoelectric_point::(&test_cases); + } +} diff --git a/rustyms/src/aminoacid_properties.rs b/rustyms/src/aminoacid/properties.rs similarity index 100% rename from rustyms/src/aminoacid_properties.rs rename to rustyms/src/aminoacid/properties.rs diff --git a/rustyms/src/aminoacid_hydrophobicity.rs b/rustyms/src/aminoacid_hydrophobicity.rs deleted file mode 100644 index e69de29b..00000000 diff --git a/rustyms/src/checked_aminoacid.rs b/rustyms/src/checked_aminoacid.rs index 5d8036d1..34212152 100644 --- a/rustyms/src/checked_aminoacid.rs +++ b/rustyms/src/checked_aminoacid.rs @@ -3,8 +3,8 @@ use std::marker::PhantomData; use serde::{Deserialize, Serialize}; use crate::{ - aminoacids::IsAminoAcid, AminoAcid, Chemical, MolecularFormula, Multi, MultiChemical, - SemiAmbiguous, UnAmbiguous, + aminoacid::{is_amino_acid::IsAminoAcid, is_amino_acid_impl::AminoAcid}, + Chemical, MolecularFormula, Multi, MultiChemical, SemiAmbiguous, UnAmbiguous, }; /// A checked amino acid. This wraps an [`AminoAcid`] to keep track of the maximal complexity of diff --git a/rustyms/src/element.rs b/rustyms/src/element.rs index 015d8def..451b6af0 100644 --- a/rustyms/src/element.rs +++ b/rustyms/src/element.rs @@ -82,7 +82,7 @@ impl Element { let mut max = None; for iso in &elemental_data()[self as usize - 1].2 { let chance = iso.2 * f64::from(n); - if max.map_or(true, |m: (Mass, f64)| chance > m.1) { + if max.is_none_or(|m: (Mass, f64)| chance > m.1) { max = Some((iso.1, chance)); } } diff --git a/rustyms/src/lib.rs b/rustyms/src/lib.rs index 39bf7268..3d82ae49 100644 --- a/rustyms/src/lib.rs +++ b/rustyms/src/lib.rs @@ -34,10 +34,7 @@ mod formula; #[path = "shared/csv.rs"] pub mod csv; -pub mod aminoacid_hydrophobicity; -pub mod aminoacid_pka; -pub mod aminoacid_properties; -mod aminoacids; +pub mod aminoacid; mod checked_aminoacid; mod element; pub mod error; @@ -87,7 +84,7 @@ pub use crate::sequence_element::SequenceElement; pub use crate::sequence_position::*; pub use crate::spectrum::{AnnotatableSpectrum, AnnotatedSpectrum, RawSpectrum}; pub use crate::tolerance::*; -pub use aminoacids::{AminoAcid, IsAminoAcid}; +pub use aminoacid::{is_amino_acid::IsAminoAcid, is_amino_acid_impl::AminoAcid}; pub use checked_aminoacid::CheckedAminoAcid; pub use fragment::Fragment; pub use peptidoform::{CompoundPeptidoformIon, Peptidoform, PeptidoformIon}; From 229e5a803f242a558136cbf20bc077948fe8e581 Mon Sep 17 00:00:00 2001 From: Douwe Schulte Date: Tue, 1 Apr 2025 15:03:08 +0200 Subject: [PATCH 7/7] Small improvements --- rendered_glycans.html | 1 + .../{is_amino_acid_impl.rs => aminoacid.rs} | 0 rustyms/src/aminoacid/mod.rs | 7 ++++-- rustyms/src/aminoacid/pka.rs | 23 ++++++++++++------- rustyms/src/checked_aminoacid.rs | 4 ++-- rustyms/src/lib.rs | 2 +- 6 files changed, 24 insertions(+), 13 deletions(-) create mode 100644 rendered_glycans.html rename rustyms/src/aminoacid/{is_amino_acid_impl.rs => aminoacid.rs} (100%) diff --git a/rendered_glycans.html b/rendered_glycans.html new file mode 100644 index 00000000..27c594fb --- /dev/null +++ b/rendered_glycans.html @@ -0,0 +1 @@ +Glycan render testpeppeppepSpeppep1pepfpeppeppeppeppepMepep2pepfDpepfpeppepopeppepDLpepLpepD3peppeppeppepNArg
  1. AcAcAc
  2. AcAc
  3. NAcNAc

\ No newline at end of file diff --git a/rustyms/src/aminoacid/is_amino_acid_impl.rs b/rustyms/src/aminoacid/aminoacid.rs similarity index 100% rename from rustyms/src/aminoacid/is_amino_acid_impl.rs rename to rustyms/src/aminoacid/aminoacid.rs diff --git a/rustyms/src/aminoacid/mod.rs b/rustyms/src/aminoacid/mod.rs index ece76e4e..63b0fada 100644 --- a/rustyms/src/aminoacid/mod.rs +++ b/rustyms/src/aminoacid/mod.rs @@ -1,4 +1,7 @@ -pub mod is_amino_acid; -pub mod is_amino_acid_impl; +mod aminoacid; +mod is_amino_acid; pub mod pka; pub mod properties; + +pub use aminoacid::*; +pub use is_amino_acid::*; diff --git a/rustyms/src/aminoacid/pka.rs b/rustyms/src/aminoacid/pka.rs index ff19fa1c..fda9090f 100644 --- a/rustyms/src/aminoacid/pka.rs +++ b/rustyms/src/aminoacid/pka.rs @@ -34,8 +34,8 @@ impl> Peptidoform { /// // Create a SemiAmbiguous Peptidoform for glutamic acid (E) and Alanine (A) /// let peptidoform = Peptidoform::pro_forma(&"EMEVEESPEK", None).unwrap().into_semi_ambiguous().unwrap(); /// let pi = peptidoform.isoelectic_point::(); - /// // The calculated pI is approximately 3.57 (bacause it's rounded to 2 decimal points) based on Lide 1991 pKa values - /// assert_eq!(pi, Some(3.57)); + /// // The calculated pI is approximately 3.57 based on Lide 1991 pKa values + /// assert_eq!(pi.map(|v| (v * 100.0).round() / 100.0), Some(3.57)); /// ``` /// /// # Shortcomings @@ -119,7 +119,7 @@ impl> Peptidoform { } } - Some((new_pi * 100.0).round() / 100.0) + Some(new_pi) } } @@ -280,10 +280,15 @@ mod tests { None::>, ) .unwrap_or_else(|| panic!("Missing pKa for {aa:?}")); - - assert_eq!(pka.n_term(), *n_term, "N-term mismatch for {aa:?}"); - assert_eq!(pka.sidechain(), *sidechain, "Sidechain mismatch for {aa:?}"); - assert_eq!(pka.c_term(), *c_term, "C-term mismatch for {aa:?}"); + let round = |v: f64| (v * 100.0).round() / 100.0; + + assert_eq!(round(pka.n_term()), *n_term, "N-term mismatch for {aa:?}"); + assert_eq!( + pka.sidechain().map(round), + *sidechain, + "Sidechain mismatch for {aa:?}" + ); + assert_eq!(round(pka.c_term()), *c_term, "C-term mismatch for {aa:?}"); } else { assert!(maybe_values.is_none(), "Expected None for {aa:?}"); } @@ -294,9 +299,11 @@ mod tests { fn test_isoelectric_point>(cases: &[(&str, Option)]) { for &(seq, expected) in cases { let peptide = create_peptidoform(seq); + let round = |v: f64| (v * 100.0).round() / 100.0; let iso = peptide.isoelectic_point::(); assert_eq!( - iso, expected, + iso.map(round), + expected, "Isoelectric point mismatch for peptide: {seq}" ); } diff --git a/rustyms/src/checked_aminoacid.rs b/rustyms/src/checked_aminoacid.rs index 34212152..c38a6240 100644 --- a/rustyms/src/checked_aminoacid.rs +++ b/rustyms/src/checked_aminoacid.rs @@ -3,8 +3,8 @@ use std::marker::PhantomData; use serde::{Deserialize, Serialize}; use crate::{ - aminoacid::{is_amino_acid::IsAminoAcid, is_amino_acid_impl::AminoAcid}, - Chemical, MolecularFormula, Multi, MultiChemical, SemiAmbiguous, UnAmbiguous, + AminoAcid, Chemical, IsAminoAcid, MolecularFormula, Multi, MultiChemical, SemiAmbiguous, + UnAmbiguous, }; /// A checked amino acid. This wraps an [`AminoAcid`] to keep track of the maximal complexity of diff --git a/rustyms/src/lib.rs b/rustyms/src/lib.rs index 3d82ae49..8814140b 100644 --- a/rustyms/src/lib.rs +++ b/rustyms/src/lib.rs @@ -84,7 +84,7 @@ pub use crate::sequence_element::SequenceElement; pub use crate::sequence_position::*; pub use crate::spectrum::{AnnotatableSpectrum, AnnotatedSpectrum, RawSpectrum}; pub use crate::tolerance::*; -pub use aminoacid::{is_amino_acid::IsAminoAcid, is_amino_acid_impl::AminoAcid}; +pub use aminoacid::{AminoAcid, IsAminoAcid}; pub use checked_aminoacid::CheckedAminoAcid; pub use fragment::Fragment; pub use peptidoform::{CompoundPeptidoformIon, Peptidoform, PeptidoformIon};