diff --git a/rendered_glycans.html b/rendered_glycans.html new file mode 100644 index 00000000..27c594fb --- /dev/null +++ b/rendered_glycans.html @@ -0,0 +1 @@ +Glycan render testpeppeppepSpeppep1pepfpeppeppeppeppepMepep2pepfDpepfpeppepopeppepDLpepLpepD3peppeppeppepNArg
  1. AcAcAc
  2. AcAc
  3. NAcNAc

\ No newline at end of file diff --git a/rustyms-generate-imgt/src/structs.rs b/rustyms-generate-imgt/src/structs.rs index 3f1bb907..f6901152 100644 --- a/rustyms-generate-imgt/src/structs.rs +++ b/rustyms-generate-imgt/src/structs.rs @@ -4,7 +4,7 @@ use std::str::FromStr; use crate::imgt_gene::IMGTGene; use crate::shared::{AnnotatedSequence, Gene, Species}; -use rustyms::AminoAcid; +use rustyms::{AminoAcid, IsAminoAcid}; #[derive(Debug)] pub struct DataItem { @@ -57,7 +57,12 @@ impl Display for Region { // self.found_seq.0, self.found_seq .as_ref() - .map(|seq| seq.1 .0.iter().map(|a| a.char()).collect::()) + .map(|seq| seq + .1 + .0 + .iter() + .map(|a| a.pro_forma_definition()) + .collect::()) .unwrap_or_else(|e| format!(": {e}")), ) } @@ -237,7 +242,10 @@ impl std::fmt::Debug for AASequence { write!( f, "[{}]", - self.0.iter().map(|a| a.char()).collect::() + self.0 + .iter() + .map(|a| a.pro_forma_definition()) + .collect::() ) } } diff --git a/rustyms-py/src/lib.rs b/rustyms-py/src/lib.rs index 13c23e5f..3d0822c2 100644 --- a/rustyms-py/src/lib.rs +++ b/rustyms-py/src/lib.rs @@ -6,7 +6,7 @@ use std::num::NonZeroU16; use ordered_float::OrderedFloat; use pyo3::{exceptions::PyValueError, prelude::*, types::PyType}; -use rustyms::{AnnotatableSpectrum, Chemical, Linked, MultiChemical}; +use rustyms::{AnnotatableSpectrum, Chemical, IsAminoAcid, Linked, MultiChemical}; /// Mass mode enum. #[pyclass(eq, eq_int)] @@ -440,7 +440,7 @@ impl AminoAcid { } fn __str__(&self) -> String { - self.0.char().to_string() + self.0.pro_forma_definition().to_string() } fn __repr__(&self) -> String { @@ -1124,7 +1124,7 @@ impl Peptidoform { self.0 .sequence() .iter() - .map(|x| x.aminoacid.char()) + .map(|x| x.aminoacid.pro_forma_definition()) .collect() } diff --git a/rustyms/src/align/multi_alignment.rs b/rustyms/src/align/multi_alignment.rs index 24e72a8f..6359de3d 100644 --- a/rustyms/src/align/multi_alignment.rs +++ b/rustyms/src/align/multi_alignment.rs @@ -36,7 +36,7 @@ impl MultiAlignmentLine<'_, Complexity> { { print!( "{}{}", - piece.1.aminoacid.char(), + piece.1.aminoacid, "·".repeat(piece.0.step as usize - 1) ); } diff --git a/rustyms/src/aminoacids.rs b/rustyms/src/aminoacid/aminoacid.rs similarity index 93% rename from rustyms/src/aminoacids.rs rename to rustyms/src/aminoacid/aminoacid.rs index 687ec960..39877f04 100644 --- a/rustyms/src/aminoacids.rs +++ b/rustyms/src/aminoacid/aminoacid.rs @@ -1,3 +1,7 @@ +//! Module used define the implementations for the [IsAminoAcid] trait + +use std::borrow::Cow; + use serde::{Deserialize, Serialize}; use crate::{ @@ -5,125 +9,209 @@ use crate::{ fragment::{Fragment, FragmentType, PeptidePosition}, model::*, molecular_charge::CachedCharge, - system::Mass, - MassMode, Multi, MultiChemical, NeutralLoss, SequencePosition, + Multi, MultiChemical, NeutralLoss, SequencePosition, }; -use std::borrow::Cow; +use super::is_amino_acid::IsAminoAcid; -/// A general trait to define amino acids. -pub trait IsAminoAcid { - /// The full name for this amino acid. - fn name(&self) -> Cow<'_, str>; - /// The three letter code for this amino acid. Or None if there is no common three letter - /// definition for this amino acid. - fn three_letter_code(&self) -> Option>; - /// The one letter code for this amino acid. Or None if there is no common single character - /// definition for this amino acid. - #[doc(alias = "code")] - fn one_letter_code(&self) -> Option; - /// The ProForma definition for this amino acid. If this is not a simple amino acid it can be - /// defined as an amino acid with an additional modification. For example `X[H9C2N2]` could be - /// used if Arginine was not defined as `R` in ProForma. - fn pro_forma_definition(&self) -> Cow<'_, str>; - /// The full molecular formula for this amino acid. It allows multiple molecular formulas to - /// allow ambiguous amino acids such as B and Z. - fn formulas(&self) -> Cow<'_, Multi>; - /// The monoisotopic mass of this amino acid. Should be redefined for better performance. - fn monoisotopic_mass(&self) -> Cow<'_, Multi> { - Cow::Owned( - self.formulas() - .iter() - .map(MolecularFormula::monoisotopic_mass) - .collect(), - ) - } - /// The average weight of this amino acid. Should be redefined for better performance. - fn average_weight(&self) -> Cow<'_, Multi> { - Cow::Owned( - self.formulas() - .iter() - .map(MolecularFormula::average_weight) - .collect(), - ) +impl std::fmt::Display for dyn IsAminoAcid { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.pro_forma_definition()) } - /// The mass with a given mass mode for this amino acid. Should be redefined for better performance. - fn mass(&self, mode: MassMode) -> Cow<'_, Multi> { - Cow::Owned(self.formulas().iter().map(|f| f.mass(mode)).collect()) +} + +include!("../shared/aminoacid.rs"); + +impl std::fmt::Display for AminoAcid { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.pro_forma_definition()) } - /// The molecular formula of the side chain of the amino acid. - fn side_chain(&self) -> Cow<'_, Multi>; - /// The molecular formulas that can fragment for satellite ions (d and w). Commonly the fragment - /// after the second carbon into the side chain. `MolecularFormula::default()` can be returned - /// if no satellite ions are possible. - fn satellite_ion_fragments(&self) -> Option>>; - /// Common neutral losses for the immonium ion of this amino acid. - fn immonium_losses(&self) -> Cow<'_, [NeutralLoss]>; } -include!("shared/aminoacid.rs"); +impl IsAminoAcid for AminoAcid { + /// Get the single letter representation of the amino acid + fn one_letter_code(&self) -> Option { + Some(match self { + Self::Alanine => 'A', + Self::AmbiguousAsparagine => 'B', + Self::Cysteine => 'C', + Self::AsparticAcid => 'D', + Self::GlutamicAcid => 'E', + Self::Phenylalanine => 'F', + Self::Glycine => 'G', + Self::Histidine => 'H', + Self::Isoleucine => 'I', + Self::AmbiguousLeucine => 'J', + Self::Lysine => 'K', + Self::Leucine => 'L', + Self::Methionine => 'M', + Self::Asparagine => 'N', + Self::Pyrrolysine => 'O', + Self::Proline => 'P', + Self::Glutamine => 'Q', + Self::Arginine => 'R', + Self::Serine => 'S', + Self::Threonine => 'T', + Self::Selenocysteine => 'U', + Self::Valine => 'V', + Self::Tryptophan => 'W', + Self::Unknown => 'X', + Self::Tyrosine => 'Y', + Self::AmbiguousGlutamine => 'Z', + }) + } -impl AminoAcid { - /// All amino acids with a unique mass (no I/L in favour of J, no B, no Z, and no X) - pub const UNIQUE_MASS_AMINO_ACIDS: &'static [Self] = &[ - Self::Glycine, - Self::Alanine, - Self::Arginine, - Self::Asparagine, - Self::AsparticAcid, - Self::Cysteine, - Self::Glutamine, - Self::GlutamicAcid, - Self::Histidine, - Self::AmbiguousLeucine, - Self::Lysine, - Self::Methionine, - Self::Phenylalanine, - Self::Proline, - Self::Serine, - Self::Threonine, - Self::Tryptophan, - Self::Tyrosine, - Self::Valine, - Self::Selenocysteine, - Self::Pyrrolysine, - ]; + fn pro_forma_definition(&self) -> Cow<'_, str> { + Cow::Borrowed(match self { + Self::Alanine => "A", + Self::AmbiguousAsparagine => "B", + Self::Cysteine => "C", + Self::AsparticAcid => "D", + Self::GlutamicAcid => "E", + Self::Phenylalanine => "F", + Self::Glycine => "G", + Self::Histidine => "H", + Self::Isoleucine => "I", + Self::AmbiguousLeucine => "J", + Self::Lysine => "K", + Self::Leucine => "L", + Self::Methionine => "M", + Self::Asparagine => "N", + Self::Pyrrolysine => "O", + Self::Proline => "P", + Self::Glutamine => "Q", + Self::Arginine => "R", + Self::Serine => "S", + Self::Threonine => "T", + Self::Selenocysteine => "U", + Self::Valine => "V", + Self::Tryptophan => "W", + Self::Unknown => "X", + Self::Tyrosine => "Y", + Self::AmbiguousGlutamine => "Z", + }) + } - /// All 20 canonical amino acids - pub const CANONICAL_AMINO_ACIDS: &'static [Self] = &[ - Self::Glycine, - Self::Alanine, - Self::Arginine, - Self::Asparagine, - Self::AsparticAcid, - Self::Cysteine, - Self::Glutamine, - Self::GlutamicAcid, - Self::Histidine, - Self::Leucine, - Self::Isoleucine, - Self::Lysine, - Self::Methionine, - Self::Phenylalanine, - Self::Proline, - Self::Serine, - Self::Threonine, - Self::Tryptophan, - Self::Tyrosine, - Self::Valine, - ]; + /// Get the 3 letter code for the amino acid + fn three_letter_code(&self) -> Option> { + Some(Cow::Borrowed(match self { + Self::Alanine => "Ala", + Self::AmbiguousAsparagine => "Asx", + Self::Cysteine => "Cys", + Self::AsparticAcid => "Asp", + Self::GlutamicAcid => "Glu", + Self::Phenylalanine => "Phe", + Self::Glycine => "Gly", + Self::Histidine => "His", + Self::Isoleucine => "Ile", + Self::AmbiguousLeucine => "Xle", + Self::Lysine => "Lys", + Self::Leucine => "Leu", + Self::Methionine => "Met", + Self::Asparagine => "Asn", + Self::Pyrrolysine => "Pyl", + Self::Proline => "Pro", + Self::Glutamine => "Gln", + Self::Arginine => "Arg", + Self::Serine => "Ser", + Self::Threonine => "Thr", + Self::Selenocysteine => "Sec", + Self::Valine => "Val", + Self::Tryptophan => "Trp", + Self::Unknown => "Xaa", + Self::Tyrosine => "Tyr", + Self::AmbiguousGlutamine => "Glx", + })) + } + + /// Get the full name for the amino acid + fn name(&self) -> Cow<'_, str> { + Cow::Borrowed(match self { + Self::Alanine => "Alanine", + Self::AmbiguousAsparagine => "AmbiguousAsparagine", + Self::Cysteine => "Cysteine", + Self::AsparticAcid => "AsparticAcid", + Self::GlutamicAcid => "GlutamicAcid", + Self::Phenylalanine => "Phenylalanine", + Self::Glycine => "Glycine", + Self::Histidine => "Histidine", + Self::Isoleucine => "Isoleucine", + Self::AmbiguousLeucine => "AmbiguousLeucine", + Self::Lysine => "Lysine", + Self::Leucine => "Leucine", + Self::Methionine => "Methionine", + Self::Asparagine => "Asparagine", + Self::Pyrrolysine => "Pyrrolysine", + Self::Proline => "Proline", + Self::Glutamine => "Glutamine", + Self::Arginine => "Arginine", + Self::Serine => "Serine", + Self::Threonine => "Threonine", + Self::Selenocysteine => "Selenocysteine", + Self::Valine => "Valine", + Self::Tryptophan => "Tryptophan", + Self::Unknown => "Unknown", + Self::Tyrosine => "Tyrosine", + Self::AmbiguousGlutamine => "AmbiguousGlutamine", + }) + } + + fn side_chain( + &self, + sequence_index: SequencePosition, + peptidoform_index: usize, + ) -> Cow<'_, Multi> { + let crate::SequencePosition::Index(sequence_index) = sequence_index else { + return Cow::Owned(Multi::default()); + }; + Cow::Owned(match self { + Self::Alanine => molecular_formula!(H 3 C 1).into(), + Self::Arginine => molecular_formula!(H 10 C 4 N 3).into(), // One of the H's counts as the charge carrier and is added later + Self::Asparagine => molecular_formula!(H 4 C 2 O 1 N 1).into(), + Self::AsparticAcid => molecular_formula!(H 3 C 2 O 2).into(), + Self::AmbiguousAsparagine => vec![ + molecular_formula!(H 4 C 2 O 1 N 1 (crate::AmbiguousLabel::AminoAcid{option: Self::Asparagine, sequence_index, peptidoform_index})), + molecular_formula!(H 3 C 2 O 2 (crate::AmbiguousLabel::AminoAcid{option: Self::AsparticAcid, sequence_index, peptidoform_index})), + ] + .into(), + Self::Cysteine => molecular_formula!(H 3 C 1 S 1).into(), + Self::Glutamine => molecular_formula!(H 6 C 3 O 1 N 1).into(), + Self::GlutamicAcid => molecular_formula!(H 5 C 3 O 2).into(), + Self::AmbiguousGlutamine => vec![ + molecular_formula!(H 6 C 3 O 1 N 1 (crate::AmbiguousLabel::AminoAcid{option: Self::Glutamine, sequence_index, peptidoform_index})), + molecular_formula!(H 5 C 3 O 2 (crate::AmbiguousLabel::AminoAcid{option: Self::GlutamicAcid, sequence_index, peptidoform_index})), + ] + .into(), + Self::Glycine => molecular_formula!(H 1).into(), + Self::Histidine => molecular_formula!(H 5 C 4 N 2).into(), + Self::AmbiguousLeucine | Self::Isoleucine | Self::Leucine => { + molecular_formula!(H 9 C 4).into() + } + Self::Lysine => molecular_formula!(H 10 C 4 N 1).into(), + Self::Methionine => molecular_formula!(H 7 C 3 S 1).into(), + Self::Phenylalanine => molecular_formula!(H 7 C 7).into(), + Self::Proline => molecular_formula!(H 5 C 3).into(), + Self::Pyrrolysine => molecular_formula!(H 17 C 9 O 1 N 2).into(), + Self::Selenocysteine => molecular_formula!(H 3 C 1 Se 1).into(), + Self::Serine => molecular_formula!(H 3 C 1 O 1).into(), + Self::Threonine => molecular_formula!(H 5 C 2 O 1).into(), + Self::Tryptophan => molecular_formula!(H 8 C 9 N 1).into(), + Self::Tyrosine => molecular_formula!(H 7 C 7 O 1).into(), + Self::Valine => molecular_formula!(H 7 C 3).into(), + Self::Unknown => molecular_formula!().into(), + }) + } // TODO: Take side chain mutations into account (maybe define pyrrolysine as a mutation) - /// # Panics - /// When the sequence index is terminal. - pub(crate) fn satellite_ion_fragments( - self, + fn satellite_ion_fragments( + &self, sequence_index: SequencePosition, peptidoform_index: usize, - ) -> Multi { + ) -> Option>> { let crate::SequencePosition::Index(sequence_index) = sequence_index else { - panic!("Not allowed to call satellite ion fragments with a terminal sequence index") + return None; }; + Some(Cow::Owned( match self { Self::Alanine | Self::Glycine @@ -172,7 +260,7 @@ impl AminoAcid { ] .into(), Self::Valine => molecular_formula!(H 3 C 1).into(), // Technically two options, but both have the same mass - } + })) } /// All losses from the base immonium ions. Compiled from the sources below. @@ -251,9 +339,9 @@ impl AminoAcid { /// | | 55 | | | 55 | | 55 | | | | | 55.0548 | | | | | | 4 | 55.0548 | | 17.0263 | | H3N1 | | H3N1 | /// | | 44 | | | | | | | | | | | | | | | | 1 | 44 | | 28.0811 | | C1H2N1 | | C1H2N1 | /// | | | | | 41 | | 41 | | | | | 41.0391 | | | | | | 3 | 41.0391 | | 31.0420 | | C1H5N1 | | C1H5N1 | - fn immonium_losses(self) -> Vec { + fn immonium_losses(&self) -> Cow<'_, [NeutralLoss]> { // TODO: For B/Z there are common immonium ions, but the mass is the same (meaning the loss is different), find a way of representing that - match self { + Cow::Owned(match self { Self::Arginine => vec![ NeutralLoss::Gain(molecular_formula!(C 2 O 2)), NeutralLoss::Loss(molecular_formula!(C 1 H 2)), @@ -321,8 +409,59 @@ impl AminoAcid { NeutralLoss::Loss(molecular_formula!(C 1 H 5 N 1)), ], _ => Vec::new(), - } + }) } +} + +impl AminoAcid { + /// All amino acids with a unique mass (no I/L in favour of J, no B, no Z, and no X) + pub const UNIQUE_MASS_AMINO_ACIDS: &'static [Self] = &[ + Self::Glycine, + Self::Alanine, + Self::Arginine, + Self::Asparagine, + Self::AsparticAcid, + Self::Cysteine, + Self::Glutamine, + Self::GlutamicAcid, + Self::Histidine, + Self::AmbiguousLeucine, + Self::Lysine, + Self::Methionine, + Self::Phenylalanine, + Self::Proline, + Self::Serine, + Self::Threonine, + Self::Tryptophan, + Self::Tyrosine, + Self::Valine, + Self::Selenocysteine, + Self::Pyrrolysine, + ]; + + /// All 20 canonical amino acids + pub const CANONICAL_AMINO_ACIDS: &'static [Self] = &[ + Self::Glycine, + Self::Alanine, + Self::Arginine, + Self::Asparagine, + Self::AsparticAcid, + Self::Cysteine, + Self::Glutamine, + Self::GlutamicAcid, + Self::Histidine, + Self::Leucine, + Self::Isoleucine, + Self::Lysine, + Self::Methionine, + Self::Phenylalanine, + Self::Proline, + Self::Serine, + Self::Threonine, + Self::Tryptophan, + Self::Tyrosine, + Self::Valine, + ]; // TODO: generalise over used storage type, so using molecularformula, monoisotopic mass, or average mass, also make sure that AAs can return these numbers in a const fashion #[expect(clippy::too_many_lines, clippy::too_many_arguments)] @@ -383,19 +522,23 @@ impl AminoAcid { )); } if ions.d.0 && allow_terminal.0 { - base_fragments.extend(Fragment::generate_all( - &(-self.satellite_ion_fragments(sequence_index, peptidoform_index) - * modifications - * self.formulas_inner(sequence_index, peptidoform_index) - + molecular_formula!(H 1 C 1 O 1)), - peptidoform_ion_index, - peptidoform_index, - &FragmentType::d(n_pos), - n_term, - ions.d.1, - charge_carriers, - ions.d.2, - )); + if let Some(satellite_ion_fragments) = + self.satellite_ion_fragments(sequence_index, peptidoform_index) + { + base_fragments.extend(Fragment::generate_all( + &(-satellite_ion_fragments.as_ref() + * modifications + * self.formulas_inner(sequence_index, peptidoform_index) + + molecular_formula!(H 1 C 1 O 1)), + peptidoform_ion_index, + peptidoform_index, + &FragmentType::d(n_pos), + n_term, + ions.d.1, + charge_carriers, + ions.d.2, + )); + } } if ions.v.0 && allow_terminal.1 { base_fragments.extend(Fragment::generate_all( @@ -410,19 +553,23 @@ impl AminoAcid { )); } if ions.w.0 && allow_terminal.1 { - base_fragments.extend(Fragment::generate_all( - &(-self.satellite_ion_fragments(sequence_index, peptidoform_index) - * modifications - * self.formulas_inner(sequence_index, peptidoform_index) - + molecular_formula!(H 2 N 1)), - peptidoform_ion_index, - peptidoform_index, - &FragmentType::w(c_pos), - c_term, - ions.w.1, - charge_carriers, - ions.w.2, - )); + if let Some(satellite_ion_fragments) = + self.satellite_ion_fragments(sequence_index, peptidoform_index) + { + base_fragments.extend(Fragment::generate_all( + &(-satellite_ion_fragments.as_ref() + * modifications + * self.formulas_inner(sequence_index, peptidoform_index) + + molecular_formula!(H 2 N 1)), + peptidoform_ion_index, + peptidoform_index, + &FragmentType::w(c_pos), + c_term, + ions.w.1, + charge_carriers, + ions.w.2, + )); + } } if ions.x.0 && allow_terminal.1 { base_fragments.extend(Fragment::generate_all( @@ -483,7 +630,7 @@ impl AminoAcid { peptidoform_index, &FragmentType::Immonium(n_pos, self.into()), // TODO: get the actual sequenceelement here &Multi::default(), - self.immonium_losses().as_slice(), + self.immonium_losses().as_ref(), charge_carriers, ions.immonium.1, )); @@ -491,102 +638,6 @@ impl AminoAcid { base_fragments } - /// Get the single letter representation of the amino acid - pub const fn char(self) -> char { - match self { - Self::Alanine => 'A', - Self::AmbiguousAsparagine => 'B', - Self::Cysteine => 'C', - Self::AsparticAcid => 'D', - Self::GlutamicAcid => 'E', - Self::Phenylalanine => 'F', - Self::Glycine => 'G', - Self::Histidine => 'H', - Self::Isoleucine => 'I', - Self::AmbiguousLeucine => 'J', - Self::Lysine => 'K', - Self::Leucine => 'L', - Self::Methionine => 'M', - Self::Asparagine => 'N', - Self::Pyrrolysine => 'O', - Self::Proline => 'P', - Self::Glutamine => 'Q', - Self::Arginine => 'R', - Self::Serine => 'S', - Self::Threonine => 'T', - Self::Selenocysteine => 'U', - Self::Valine => 'V', - Self::Tryptophan => 'W', - Self::Unknown => 'X', - Self::Tyrosine => 'Y', - Self::AmbiguousGlutamine => 'Z', - } - } - - /// Get the 3 letter code for the amino acid - pub const fn code(self) -> &'static str { - match self { - Self::Alanine => "Ala", - Self::AmbiguousAsparagine => "Asx", - Self::Cysteine => "Cys", - Self::AsparticAcid => "Asp", - Self::GlutamicAcid => "Glu", - Self::Phenylalanine => "Phe", - Self::Glycine => "Gly", - Self::Histidine => "His", - Self::Isoleucine => "Ile", - Self::AmbiguousLeucine => "Xle", - Self::Lysine => "Lys", - Self::Leucine => "Leu", - Self::Methionine => "Met", - Self::Asparagine => "Asn", - Self::Pyrrolysine => "Pyl", - Self::Proline => "Pro", - Self::Glutamine => "Gln", - Self::Arginine => "Arg", - Self::Serine => "Ser", - Self::Threonine => "Thr", - Self::Selenocysteine => "Sec", - Self::Valine => "Val", - Self::Tryptophan => "Trp", - Self::Unknown => "Xaa", - Self::Tyrosine => "Tyr", - Self::AmbiguousGlutamine => "Glx", - } - } - - /// Get the full name for the amino acid - pub const fn name(self) -> &'static str { - match self { - Self::Alanine => "Alanine", - Self::AmbiguousAsparagine => "AmbiguousAsparagine", - Self::Cysteine => "Cysteine", - Self::AsparticAcid => "AsparticAcid", - Self::GlutamicAcid => "GlutamicAcid", - Self::Phenylalanine => "Phenylalanine", - Self::Glycine => "Glycine", - Self::Histidine => "Histidine", - Self::Isoleucine => "Isoleucine", - Self::AmbiguousLeucine => "AmbiguousLeucine", - Self::Lysine => "Lysine", - Self::Leucine => "Leucine", - Self::Methionine => "Methionine", - Self::Asparagine => "Asparagine", - Self::Pyrrolysine => "Pyrrolysine", - Self::Proline => "Proline", - Self::Glutamine => "Glutamine", - Self::Arginine => "Arginine", - Self::Serine => "Serine", - Self::Threonine => "Threonine", - Self::Selenocysteine => "Selenocysteine", - Self::Valine => "Valine", - Self::Tryptophan => "Tryptophan", - Self::Unknown => "Unknown", - Self::Tyrosine => "Tyrosine", - Self::AmbiguousGlutamine => "AmbiguousGlutamine", - } - } - /// Check if two amino acids are considered identical. X is identical to anything, J to IL, B to ND, Z to EQ. pub(crate) fn canonical_identical(self, rhs: Self) -> bool { match (self, rhs) { @@ -604,12 +655,6 @@ impl AminoAcid { } } -impl std::fmt::Display for AminoAcid { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.char()) - } -} - #[cfg(test)] #[expect(clippy::unreadable_literal, clippy::missing_panics_doc)] mod tests { @@ -666,7 +711,7 @@ mod tests { ); println!( "{}: {} {} {} {}", - aa.char(), + aa.pro_forma_definition(), mono, mono_mass, weight, diff --git a/rustyms/src/aminoacid/is_amino_acid.rs b/rustyms/src/aminoacid/is_amino_acid.rs new file mode 100644 index 00000000..882660ab --- /dev/null +++ b/rustyms/src/aminoacid/is_amino_acid.rs @@ -0,0 +1,65 @@ +//! Module used create the [IsAminoAcid] trait + +use crate::{ + formula::MolecularFormula, system::Mass, MassMode, Multi, MultiChemical, NeutralLoss, + SequencePosition, +}; + +use std::borrow::Cow; + +/// A general trait to define amino acids. +pub trait IsAminoAcid: MultiChemical { + /// The full name for this amino acid. + fn name(&self) -> Cow<'_, str>; + /// The three letter code for this amino acid. Or None if there is no common three letter + /// definition for this amino acid. + fn three_letter_code(&self) -> Option>; + /// The one letter code for this amino acid. Or None if there is no common single character + /// definition for this amino acid. + #[doc(alias = "code")] + fn one_letter_code(&self) -> Option; + /// The ProForma definition for this amino acid. If this is not a simple amino acid it can be + /// defined as an amino acid with an additional modification. For example `X[H9C2N2]` could be + /// used if Arginine was not defined as `R` in ProForma. + fn pro_forma_definition(&self) -> Cow<'_, str>; + /// The monoisotopic mass of this amino acid. Should be redefined for better performance. + fn monoisotopic_mass(&self) -> Cow<'_, Multi> { + Cow::Owned( + self.formulas() + .iter() + .map(MolecularFormula::monoisotopic_mass) + .collect(), + ) + } + /// The average weight of this amino acid. Should be redefined for better performance. + fn average_weight(&self) -> Cow<'_, Multi> { + Cow::Owned( + self.formulas() + .iter() + .map(MolecularFormula::average_weight) + .collect(), + ) + } + /// The mass with a given mass mode for this amino acid. Should be redefined for better performance. + fn mass(&self, mode: MassMode) -> Cow<'_, Multi> { + Cow::Owned(self.formulas().iter().map(|f| f.mass(mode)).collect()) + } + /// The molecular formula of the side chain of the amino acid. The `sequence_index` and + /// `peptidoform_index` are used to keep track of ambiguous amino acids. + fn side_chain( + &self, + sequence_index: SequencePosition, + peptidoform_index: usize, + ) -> Cow<'_, Multi>; + /// The molecular formulas that can fragment for satellite ions (d and w). Commonly the fragment + /// after the second carbon into the side chain. `MolecularFormula::default()` can be returned + /// if no satellite ions are possible. The `sequence_index` and `peptidoform_index` are used to + /// keep track of ambiguous amino acids. + fn satellite_ion_fragments( + &self, + sequence_index: SequencePosition, + peptidoform_index: usize, + ) -> Option>>; + /// Common neutral losses for the immonium ion of this amino acid. + fn immonium_losses(&self) -> Cow<'_, [NeutralLoss]>; +} diff --git a/rustyms/src/aminoacid/mod.rs b/rustyms/src/aminoacid/mod.rs new file mode 100644 index 00000000..63b0fada --- /dev/null +++ b/rustyms/src/aminoacid/mod.rs @@ -0,0 +1,7 @@ +mod aminoacid; +mod is_amino_acid; +pub mod pka; +pub mod properties; + +pub use aminoacid::*; +pub use is_amino_acid::*; diff --git a/rustyms/src/aminoacid/pka.rs b/rustyms/src/aminoacid/pka.rs new file mode 100644 index 00000000..fda9090f --- /dev/null +++ b/rustyms/src/aminoacid/pka.rs @@ -0,0 +1,367 @@ +//! Module used to store and calculate pKa and isoelectric point values for a given [AminoAcid] or [Peptidoform] respectively + +use serde::{Deserialize, Serialize}; + +use crate::{ + aminoacid::properties::ChargeClass, modification::SimpleModificationInner, AminoAcid, AtMax, + Peptidoform, SemiAmbiguous, +}; + +use super::is_amino_acid::IsAminoAcid; + +/// A source for pKa values, which can be used to calculate the pKa for peptidoforms. +pub trait PKaSource { + /// Get the pKa values for the given amino acid and modifications. + #[allow(non_snake_case)] + fn pKa( + amino_acid: AA, + side_chain_modifications: impl Iterator>, + n_terminal_modifications: Option>>, + c_terminal_modifications: Option>>, + ) -> Option; +} + +impl> Peptidoform { + /// Get the calculated isoelectric point (pI) for the peptidoform, or None if any sequence elements lack pKa values. + /// + /// The isoelectric point is the pH at which the net charge of the peptidoform is zero. This is determined using a binary + /// search between pH 0 and 14. The charge at each pH is computed using the Henderson-Hasselbalch equation with pKa values + /// from the provided `PKaSource`, considering N-terminal, C-terminal, and sidechain ionizable groups. + /// + /// # Example + /// ```rust + /// # use rustyms::{Peptidoform, aminoacid::pka::{PKaSource, PKaLide1991}}; + /// // Create a SemiAmbiguous Peptidoform for glutamic acid (E) and Alanine (A) + /// let peptidoform = Peptidoform::pro_forma(&"EMEVEESPEK", None).unwrap().into_semi_ambiguous().unwrap(); + /// let pi = peptidoform.isoelectic_point::(); + /// // The calculated pI is approximately 3.57 based on Lide 1991 pKa values + /// assert_eq!(pi.map(|v| (v * 100.0).round() / 100.0), Some(3.57)); + /// ``` + /// + /// # Shortcomings + /// - **Naive Approach**: Does not account for interactions between ionizable groups. + /// - **Modifications Ignored**: Modifications affecting pKa are not considered. + /// - **Environmental Factors**: Assumes pKa values are independent of sequence and environment. + /// + /// Get the calculated pKa value for the given peptidoform, or None if any of the sequence elements do not have a defined pKa. + #[allow(non_snake_case)] + pub fn isoelectic_point>(&self) -> Option { + let sequence = self.sequence(); + if sequence.is_empty() { + return None; + } + + // Collect all ionizable groups with their pKa values + let mut ionizable = Vec::with_capacity(sequence.len() + 2); + + // Handle N-terminal + let first = sequence.first()?; + ionizable.push(( + ChargeClass::Positive, + Source::pKa( + first.aminoacid.aminoacid(), + first.modifications.iter().filter_map(|m| m.simple()), + Some(self.get_n_term().iter().filter_map(|m| m.simple())), + (self.len() == 1).then_some(self.get_c_term().iter().filter_map(|m| m.simple())), + )? + .n_term(), + )); // N-terminal is always positive + + // Handle C-terminal + let last = sequence.last()?; + ionizable.push(( + ChargeClass::Negative, + Source::pKa( + last.aminoacid.aminoacid(), + last.modifications.iter().filter_map(|m| m.simple()), + (self.len() == 1).then_some(self.get_n_term().iter().filter_map(|m| m.simple())), + Some(self.get_c_term().iter().filter_map(|m| m.simple())), + )? + .c_term(), + )); // C-terminal is always negative + + // Handle sidechains + for (index, aa) in sequence.iter().enumerate() { + if let Some(sidechain) = Source::pKa( + aa.aminoacid.aminoacid(), + aa.modifications.iter().filter_map(|m| m.simple()), + (index == 0).then_some(self.get_n_term().iter().filter_map(|m| m.simple())), + (index == self.len() - 1) + .then_some(self.get_n_term().iter().filter_map(|m| m.simple())), + )? + .sidechain() + { + let charge_class = aa.aminoacid.aminoacid().charge_class(); + match charge_class { + ChargeClass::Positive | ChargeClass::Negative => { + ionizable.push((charge_class, sidechain)); + } + ChargeClass::Unknown => return None, + ChargeClass::Uncharged => (), + } + } + } + + // Binary search between pH 0-14 to find isoelectric point + let mut low = 0.0; + let mut high = 14.0; + let mut new_pi = 7.775; + const EPSILON: f64 = 0.0001; + + while (high - low) > EPSILON { + new_pi = (low + high) / 2.0; + let charge = calculate_charge(new_pi, &ionizable); + + if charge > 0.0 { + low = new_pi; + } else { + high = new_pi; + } + } + + Some(new_pi) + } +} + +fn calculate_charge(pH: f64, ionizable: &[(ChargeClass, f64)]) -> f64 { + let mut charge = 0.0; + + for (class, pka) in ionizable { + match class { + ChargeClass::Positive => charge += 1.0 / (10.0_f64.powf(pH - pka) + 1.0), + ChargeClass::Negative => charge -= 1.0 / (10.0_f64.powf(pka - pH) + 1.0), + _ => {} + } + } + + charge +} +/// The pKa for a specific Amino Acid +#[derive(Copy, Clone, PartialEq, PartialOrd, Debug, Serialize, Deserialize)] +pub struct AminoAcidPKa { + n_term: f64, + sidechain: Option, + c_term: f64, +} + +impl AminoAcidPKa { + const fn new(n_term: f64, sidechain: Option, c_term: f64) -> Self { + Self { + n_term, + sidechain, + c_term, + } + } + + /// Get the pKa value for the n-term of the Amino acid + pub const fn n_term(self) -> f64 { + self.n_term + } + + /// Get the pKa value for the side-chain group of the Amino acid + pub const fn sidechain(self) -> Option { + self.sidechain + } + + /// Get the pKa value for the c-term of the Amino acid + pub const fn c_term(self) -> f64 { + self.c_term + } +} + +/// pKa values from Lide, D. R. (1991). Handbook of Chemistry and Physics: A Ready Reference Book of Chemical and Physical Data. +pub struct PKaLide1991; + +impl PKaSource for PKaLide1991 { + fn pKa( + amino_acid: AminoAcid, + mut side_chain_modifications: impl Iterator>, + n_terminal_modifications: Option>>, + c_terminal_modifications: Option>>, + ) -> Option { + if side_chain_modifications.next().is_some() + || n_terminal_modifications.is_some_and(|mut m| m.next().is_some()) + || c_terminal_modifications.is_some_and(|mut m| m.next().is_some()) + { + return None; + } + match amino_acid { + AminoAcid::Arginine => Some(AminoAcidPKa::new(9.00, Some(12.10), 2.03)), + AminoAcid::Histidine => Some(AminoAcidPKa::new(9.09, Some(6.04), 1.70)), + AminoAcid::Lysine => Some(AminoAcidPKa::new(9.16, Some(10.67), 2.15)), + AminoAcid::AsparticAcid => Some(AminoAcidPKa::new(9.66, Some(3.71), 1.95)), + AminoAcid::GlutamicAcid => Some(AminoAcidPKa::new(9.58, Some(4.15), 2.16)), + AminoAcid::Tyrosine => Some(AminoAcidPKa::new(9.04, Some(10.10), 2.24)), + AminoAcid::Cysteine => Some(AminoAcidPKa::new(10.28, Some(8.14), 1.91)), + AminoAcid::Alanine => Some(AminoAcidPKa::new(9.71, None, 2.33)), + AminoAcid::Glycine => Some(AminoAcidPKa::new(9.58, None, 2.34)), + AminoAcid::Proline => Some(AminoAcidPKa::new(10.47, None, 1.95)), + AminoAcid::Serine => Some(AminoAcidPKa::new(9.05, None, 2.13)), + AminoAcid::Threonine => Some(AminoAcidPKa::new(8.96, None, 2.20)), + AminoAcid::Methionine => Some(AminoAcidPKa::new(9.08, None, 2.16)), + AminoAcid::Phenylalanine => Some(AminoAcidPKa::new(9.09, None, 2.18)), + AminoAcid::Tryptophan => Some(AminoAcidPKa::new(9.34, None, 2.38)), + AminoAcid::Valine => Some(AminoAcidPKa::new(9.52, None, 2.27)), + AminoAcid::Isoleucine => Some(AminoAcidPKa::new(9.60, None, 2.26)), + AminoAcid::Leucine => Some(AminoAcidPKa::new(9.58, None, 2.32)), + AminoAcid::Glutamine => Some(AminoAcidPKa::new(9.00, None, 2.18)), + AminoAcid::Asparagine => Some(AminoAcidPKa::new(8.73, None, 2.16)), + _ => None, + } + } +} + +/// pKa values from Lehninger, A. L., Nelson, D. L., & Cox, M. M. (2005). Lehninger Principles of Biochemistry. Macmillan. +pub struct PKaLehninger; + +impl PKaSource for PKaLehninger { + fn pKa( + amino_acid: AminoAcid, + mut side_chain_modifications: impl Iterator>, + n_terminal_modifications: Option>>, + c_terminal_modifications: Option>>, + ) -> Option { + if side_chain_modifications.next().is_some() + || n_terminal_modifications.is_some_and(|mut m| m.next().is_some()) + || c_terminal_modifications.is_some_and(|mut m| m.next().is_some()) + { + return None; + } + match amino_acid { + AminoAcid::Arginine => Some(AminoAcidPKa::new(9.04, Some(12.48), 2.17)), + AminoAcid::Histidine => Some(AminoAcidPKa::new(9.17, Some(6.00), 1.82)), + AminoAcid::Lysine => Some(AminoAcidPKa::new(8.95, Some(10.53), 2.18)), + AminoAcid::AsparticAcid => Some(AminoAcidPKa::new(9.60, Some(3.65), 1.88)), + AminoAcid::GlutamicAcid => Some(AminoAcidPKa::new(9.67, Some(4.25), 2.19)), + AminoAcid::Tyrosine => Some(AminoAcidPKa::new(9.11, Some(10.07), 2.20)), + AminoAcid::Cysteine => Some(AminoAcidPKa::new(10.28, Some(8.18), 1.96)), + AminoAcid::Alanine => Some(AminoAcidPKa::new(9.69, None, 2.34)), + AminoAcid::Glycine => Some(AminoAcidPKa::new(9.60, None, 2.34)), + AminoAcid::Proline => Some(AminoAcidPKa::new(10.96, None, 1.99)), + AminoAcid::Serine => Some(AminoAcidPKa::new(9.15, None, 2.21)), + AminoAcid::Threonine => Some(AminoAcidPKa::new(9.62, None, 2.11)), + AminoAcid::Methionine => Some(AminoAcidPKa::new(9.21, None, 2.28)), + AminoAcid::Phenylalanine => Some(AminoAcidPKa::new(9.13, None, 1.83)), + AminoAcid::Tryptophan => Some(AminoAcidPKa::new(9.39, None, 2.38)), + AminoAcid::Valine => Some(AminoAcidPKa::new(9.62, None, 2.32)), + AminoAcid::Isoleucine => Some(AminoAcidPKa::new(9.68, None, 2.36)), + AminoAcid::Leucine => Some(AminoAcidPKa::new(9.60, None, 2.36)), + AminoAcid::Glutamine => Some(AminoAcidPKa::new(9.13, None, 2.17)), + AminoAcid::Asparagine => Some(AminoAcidPKa::new(8.80, None, 2.02)), + _ => None, + } + } +} + +#[cfg(test)] +#[expect(clippy::float_cmp, clippy::missing_panics_doc)] +mod tests { + use super::*; + use crate::{modification::SimpleModification, Peptidoform, SemiAmbiguous}; + + // Helper to create a Peptidoform from a list of amino acids + fn create_peptidoform(aas: &str) -> Peptidoform { + Peptidoform::pro_forma(aas, None) + .unwrap() + .into_semi_ambiguous() + .unwrap() + } + + // Helper function to test pKa values for a given source + fn test_pka>( + test_cases: &[(AminoAcid, Option<(f64, Option, f64)>)], + ) { + for (aa, maybe_values) in test_cases { + if let Some((n_term, sidechain, c_term)) = maybe_values { + let pka = Source::pKa( + *aa, + std::iter::empty::(), + None::>, + None::>, + ) + .unwrap_or_else(|| panic!("Missing pKa for {aa:?}")); + let round = |v: f64| (v * 100.0).round() / 100.0; + + assert_eq!(round(pka.n_term()), *n_term, "N-term mismatch for {aa:?}"); + assert_eq!( + pka.sidechain().map(round), + *sidechain, + "Sidechain mismatch for {aa:?}" + ); + assert_eq!(round(pka.c_term()), *c_term, "C-term mismatch for {aa:?}"); + } else { + assert!(maybe_values.is_none(), "Expected None for {aa:?}"); + } + } + } + + // Helper function to test an isoelectric point value given a source + fn test_isoelectric_point>(cases: &[(&str, Option)]) { + for &(seq, expected) in cases { + let peptide = create_peptidoform(seq); + let round = |v: f64| (v * 100.0).round() / 100.0; + let iso = peptide.isoelectic_point::(); + assert_eq!( + iso.map(round), + expected, + "Isoelectric point mismatch for peptide: {seq}" + ); + } + } + + #[test] + fn test_pka_lide1991() { + let test_cases = [ + (AminoAcid::Arginine, Some((9.00, Some(12.10), 2.03))), + (AminoAcid::GlutamicAcid, Some((9.58, Some(4.15), 2.16))), + (AminoAcid::Alanine, Some((9.71, None, 2.33))), + (AminoAcid::Histidine, Some((9.09, Some(6.04), 1.70))), + (AminoAcid::Unknown, None), + ]; + + test_pka::(&test_cases); + } + + #[test] + fn test_pka_lehninger() { + let test_cases = [ + (AminoAcid::Cysteine, Some((10.28, Some(8.18), 1.96))), + (AminoAcid::AsparticAcid, Some((9.60, Some(3.65), 1.88))), + (AminoAcid::Isoleucine, Some((9.68, None, 2.36))), + (AminoAcid::Tryptophan, Some((9.39, None, 2.38))), + (AminoAcid::Selenocysteine, None), + ]; + + test_pka::(&test_cases); + } + + #[test] + fn test_isoelectric_point_lide1991() { + let test_cases = [ + ("E", Some(3.16)), + ("A", Some(6.02)), + ("DE", Some(2.85)), + ("HR", Some(10.6)), + ("KDEH", Some(5.17)), + ("AXRT", None), + ("AXRT[Oxidation]", None), + ]; + + test_isoelectric_point::(&test_cases); + } + + #[test] + fn test_isoelectric_point_lehninger() { + let test_cases = [ + ("G", Some(5.97)), + ("Y", Some(5.65)), + ("CQ", Some(6.23)), + ("KP", Some(9.74)), + ("FIVS", Some(5.67)), + ("TKLB", None), + ("TK[Oxidation]LB", None), + ]; + + test_isoelectric_point::(&test_cases); + } +} diff --git a/rustyms/src/aminoacid_properties.rs b/rustyms/src/aminoacid/properties.rs similarity index 100% rename from rustyms/src/aminoacid_properties.rs rename to rustyms/src/aminoacid/properties.rs diff --git a/rustyms/src/checked_aminoacid.rs b/rustyms/src/checked_aminoacid.rs index f709c828..c38a6240 100644 --- a/rustyms/src/checked_aminoacid.rs +++ b/rustyms/src/checked_aminoacid.rs @@ -3,7 +3,8 @@ use std::marker::PhantomData; use serde::{Deserialize, Serialize}; use crate::{ - AminoAcid, Chemical, MolecularFormula, Multi, MultiChemical, SemiAmbiguous, UnAmbiguous, + AminoAcid, Chemical, IsAminoAcid, MolecularFormula, Multi, MultiChemical, SemiAmbiguous, + UnAmbiguous, }; /// A checked amino acid. This wraps an [`AminoAcid`] to keep track of the maximal complexity of @@ -279,24 +280,48 @@ impl CheckedAminoAcid { self.aminoacid.canonical_identical(rhs.aminoacid) } - /// Get the description of the amino acid as a single character - pub const fn char(self) -> char { - self.aminoacid.char() + /// Get the underlying (unchecked) amino acid + pub const fn aminoacid(self) -> AminoAcid { + self.aminoacid } +} - /// Get the 3 letter code for the amino acid - pub const fn code(self) -> &'static str { - self.aminoacid.code() +impl IsAminoAcid for CheckedAminoAcid { + fn name(&self) -> std::borrow::Cow<'_, str> { + self.aminoacid.name() } - /// Get the full name of the amino acid - pub const fn name(self) -> &'static str { - self.aminoacid.name() + fn three_letter_code(&self) -> Option> { + self.aminoacid.three_letter_code() } - /// Get the underlying (unchecked) amino acid - pub const fn aminoacid(self) -> AminoAcid { + fn one_letter_code(&self) -> Option { + self.aminoacid.one_letter_code() + } + + fn pro_forma_definition(&self) -> std::borrow::Cow<'_, str> { + self.aminoacid.pro_forma_definition() + } + + fn immonium_losses(&self) -> std::borrow::Cow<'_, [crate::NeutralLoss]> { + self.aminoacid.immonium_losses() + } + + fn satellite_ion_fragments( + &self, + sequence_index: crate::SequencePosition, + peptidoform_index: usize, + ) -> Option>> { self.aminoacid + .satellite_ion_fragments(sequence_index, peptidoform_index) + } + + fn side_chain( + &self, + sequence_index: crate::SequencePosition, + peptidoform_index: usize, + ) -> std::borrow::Cow<'_, Multi> { + self.aminoacid.side_chain(sequence_index, peptidoform_index) } } @@ -398,7 +423,7 @@ impl Default for CheckedAminoAcid { impl std::fmt::Display for CheckedAminoAcid { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.char()) + write!(f, "{}", self.pro_forma_definition()) } } diff --git a/rustyms/src/element.rs b/rustyms/src/element.rs index 015d8def..451b6af0 100644 --- a/rustyms/src/element.rs +++ b/rustyms/src/element.rs @@ -82,7 +82,7 @@ impl Element { let mut max = None; for iso in &elemental_data()[self as usize - 1].2 { let chance = iso.2 * f64::from(n); - if max.map_or(true, |m: (Mass, f64)| chance > m.1) { + if max.is_none_or(|m: (Mass, f64)| chance > m.1) { max = Some((iso.1, chance)); } } diff --git a/rustyms/src/fragment.rs b/rustyms/src/fragment.rs index cfd67b9a..890965ad 100644 --- a/rustyms/src/fragment.rs +++ b/rustyms/src/fragment.rs @@ -607,9 +607,7 @@ impl FragmentType { Self::z(_) => Cow::Borrowed("z"), Self::z·(_) => Cow::Borrowed("z·"), Self::Y(_) | Self::YComposition(_, _) => Cow::Borrowed("Y"), - Self::Diagnostic(DiagnosticPosition::Peptide(_, aa)) => { - Cow::Owned(format!("d{}", aa.char())) - } + Self::Diagnostic(DiagnosticPosition::Peptide(_, aa)) => Cow::Owned(format!("d{aa}")), Self::Diagnostic(DiagnosticPosition::Reporter) => Cow::Borrowed("r"), Self::Diagnostic(DiagnosticPosition::Labile(m)) => Cow::Owned(format!("d{m}")), Self::Diagnostic( diff --git a/rustyms/src/identification/identified_peptide.rs b/rustyms/src/identification/identified_peptide.rs index 054ee678..cfa7beae 100644 --- a/rustyms/src/identification/identified_peptide.rs +++ b/rustyms/src/identification/identified_peptide.rs @@ -506,9 +506,9 @@ impl IdentifiedPeptide { precursor_mz: mz, .. }) | MetaData::MSFragger(MSFraggerData { mz, .. }) => Some(*mz), - MetaData::MZTab(MZTabData { mz, .. }) | MetaData::MaxQuant(MaxQuantData { mz, .. }) => { - *mz - } + MetaData::MZTab(MZTabData { mz, .. }) + | MetaData::MaxQuant(MaxQuantData { mz, .. }) + | MetaData::DeepNovoFamily(DeepNovoFamilyData { mz, .. }) => *mz, MetaData::Sage(SageData { mass, z, .. }) | MetaData::NovoB(NovoBData { mass, z, .. }) | MetaData::PLink(PLinkData { mass, z, .. }) => { @@ -516,8 +516,7 @@ impl IdentifiedPeptide { mass.value / (z.value as f64), )) } - MetaData::DeepNovoFamily(_) - | MetaData::Fasta(_) + MetaData::Fasta(_) | MetaData::SpectrumSequenceList(_) | MetaData::PowerNovo(_) | MetaData::PepNet(_) => None, diff --git a/rustyms/src/lib.rs b/rustyms/src/lib.rs index 96d410f9..8814140b 100644 --- a/rustyms/src/lib.rs +++ b/rustyms/src/lib.rs @@ -34,8 +34,7 @@ mod formula; #[path = "shared/csv.rs"] pub mod csv; -pub mod aminoacid_properties; -mod aminoacids; +pub mod aminoacid; mod checked_aminoacid; mod element; pub mod error; @@ -85,7 +84,7 @@ pub use crate::sequence_element::SequenceElement; pub use crate::sequence_position::*; pub use crate::spectrum::{AnnotatableSpectrum, AnnotatedSpectrum, RawSpectrum}; pub use crate::tolerance::*; -pub use aminoacids::AminoAcid; +pub use aminoacid::{AminoAcid, IsAminoAcid}; pub use checked_aminoacid::CheckedAminoAcid; pub use fragment::Fragment; pub use peptidoform::{CompoundPeptidoformIon, Peptidoform, PeptidoformIon}; diff --git a/rustyms/src/sequence_element.rs b/rustyms/src/sequence_element.rs index e53ce32a..9c843973 100644 --- a/rustyms/src/sequence_element.rs +++ b/rustyms/src/sequence_element.rs @@ -117,7 +117,7 @@ impl SequenceElement { if self.ambiguous.is_some() && last_ambiguous != self.ambiguous { write!(f, "(?")?; } - write!(f, "{}", self.aminoacid.char())?; + write!(f, "{}", self.aminoacid)?; for m in &self.modifications { let mut display_ambiguous = false; if let Modification::Ambiguous { id, .. } = m {