diff --git a/rendered_glycans.html b/rendered_glycans.html
new file mode 100644
index 00000000..27c594fb
--- /dev/null
+++ b/rendered_glycans.html
@@ -0,0 +1 @@
+
Glycan render test










































- AcAcAc
- AcAc
- NAcNAc
\ No newline at end of file
diff --git a/rustyms-generate-imgt/src/structs.rs b/rustyms-generate-imgt/src/structs.rs
index 3f1bb907..f6901152 100644
--- a/rustyms-generate-imgt/src/structs.rs
+++ b/rustyms-generate-imgt/src/structs.rs
@@ -4,7 +4,7 @@ use std::str::FromStr;
use crate::imgt_gene::IMGTGene;
use crate::shared::{AnnotatedSequence, Gene, Species};
-use rustyms::AminoAcid;
+use rustyms::{AminoAcid, IsAminoAcid};
#[derive(Debug)]
pub struct DataItem {
@@ -57,7 +57,12 @@ impl Display for Region {
// self.found_seq.0,
self.found_seq
.as_ref()
- .map(|seq| seq.1 .0.iter().map(|a| a.char()).collect::())
+ .map(|seq| seq
+ .1
+ .0
+ .iter()
+ .map(|a| a.pro_forma_definition())
+ .collect::())
.unwrap_or_else(|e| format!(": {e}")),
)
}
@@ -237,7 +242,10 @@ impl std::fmt::Debug for AASequence {
write!(
f,
"[{}]",
- self.0.iter().map(|a| a.char()).collect::()
+ self.0
+ .iter()
+ .map(|a| a.pro_forma_definition())
+ .collect::()
)
}
}
diff --git a/rustyms-py/src/lib.rs b/rustyms-py/src/lib.rs
index 13c23e5f..3d0822c2 100644
--- a/rustyms-py/src/lib.rs
+++ b/rustyms-py/src/lib.rs
@@ -6,7 +6,7 @@ use std::num::NonZeroU16;
use ordered_float::OrderedFloat;
use pyo3::{exceptions::PyValueError, prelude::*, types::PyType};
-use rustyms::{AnnotatableSpectrum, Chemical, Linked, MultiChemical};
+use rustyms::{AnnotatableSpectrum, Chemical, IsAminoAcid, Linked, MultiChemical};
/// Mass mode enum.
#[pyclass(eq, eq_int)]
@@ -440,7 +440,7 @@ impl AminoAcid {
}
fn __str__(&self) -> String {
- self.0.char().to_string()
+ self.0.pro_forma_definition().to_string()
}
fn __repr__(&self) -> String {
@@ -1124,7 +1124,7 @@ impl Peptidoform {
self.0
.sequence()
.iter()
- .map(|x| x.aminoacid.char())
+ .map(|x| x.aminoacid.pro_forma_definition())
.collect()
}
diff --git a/rustyms/src/align/multi_alignment.rs b/rustyms/src/align/multi_alignment.rs
index 24e72a8f..6359de3d 100644
--- a/rustyms/src/align/multi_alignment.rs
+++ b/rustyms/src/align/multi_alignment.rs
@@ -36,7 +36,7 @@ impl MultiAlignmentLine<'_, Complexity> {
{
print!(
"{}{}",
- piece.1.aminoacid.char(),
+ piece.1.aminoacid,
"·".repeat(piece.0.step as usize - 1)
);
}
diff --git a/rustyms/src/aminoacids.rs b/rustyms/src/aminoacid/aminoacid.rs
similarity index 93%
rename from rustyms/src/aminoacids.rs
rename to rustyms/src/aminoacid/aminoacid.rs
index 687ec960..39877f04 100644
--- a/rustyms/src/aminoacids.rs
+++ b/rustyms/src/aminoacid/aminoacid.rs
@@ -1,3 +1,7 @@
+//! Module used define the implementations for the [IsAminoAcid] trait
+
+use std::borrow::Cow;
+
use serde::{Deserialize, Serialize};
use crate::{
@@ -5,125 +9,209 @@ use crate::{
fragment::{Fragment, FragmentType, PeptidePosition},
model::*,
molecular_charge::CachedCharge,
- system::Mass,
- MassMode, Multi, MultiChemical, NeutralLoss, SequencePosition,
+ Multi, MultiChemical, NeutralLoss, SequencePosition,
};
-use std::borrow::Cow;
+use super::is_amino_acid::IsAminoAcid;
-/// A general trait to define amino acids.
-pub trait IsAminoAcid {
- /// The full name for this amino acid.
- fn name(&self) -> Cow<'_, str>;
- /// The three letter code for this amino acid. Or None if there is no common three letter
- /// definition for this amino acid.
- fn three_letter_code(&self) -> Option>;
- /// The one letter code for this amino acid. Or None if there is no common single character
- /// definition for this amino acid.
- #[doc(alias = "code")]
- fn one_letter_code(&self) -> Option;
- /// The ProForma definition for this amino acid. If this is not a simple amino acid it can be
- /// defined as an amino acid with an additional modification. For example `X[H9C2N2]` could be
- /// used if Arginine was not defined as `R` in ProForma.
- fn pro_forma_definition(&self) -> Cow<'_, str>;
- /// The full molecular formula for this amino acid. It allows multiple molecular formulas to
- /// allow ambiguous amino acids such as B and Z.
- fn formulas(&self) -> Cow<'_, Multi>;
- /// The monoisotopic mass of this amino acid. Should be redefined for better performance.
- fn monoisotopic_mass(&self) -> Cow<'_, Multi> {
- Cow::Owned(
- self.formulas()
- .iter()
- .map(MolecularFormula::monoisotopic_mass)
- .collect(),
- )
- }
- /// The average weight of this amino acid. Should be redefined for better performance.
- fn average_weight(&self) -> Cow<'_, Multi> {
- Cow::Owned(
- self.formulas()
- .iter()
- .map(MolecularFormula::average_weight)
- .collect(),
- )
+impl std::fmt::Display for dyn IsAminoAcid {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}", self.pro_forma_definition())
}
- /// The mass with a given mass mode for this amino acid. Should be redefined for better performance.
- fn mass(&self, mode: MassMode) -> Cow<'_, Multi> {
- Cow::Owned(self.formulas().iter().map(|f| f.mass(mode)).collect())
+}
+
+include!("../shared/aminoacid.rs");
+
+impl std::fmt::Display for AminoAcid {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}", self.pro_forma_definition())
}
- /// The molecular formula of the side chain of the amino acid.
- fn side_chain(&self) -> Cow<'_, Multi>;
- /// The molecular formulas that can fragment for satellite ions (d and w). Commonly the fragment
- /// after the second carbon into the side chain. `MolecularFormula::default()` can be returned
- /// if no satellite ions are possible.
- fn satellite_ion_fragments(&self) -> Option>>;
- /// Common neutral losses for the immonium ion of this amino acid.
- fn immonium_losses(&self) -> Cow<'_, [NeutralLoss]>;
}
-include!("shared/aminoacid.rs");
+impl IsAminoAcid for AminoAcid {
+ /// Get the single letter representation of the amino acid
+ fn one_letter_code(&self) -> Option {
+ Some(match self {
+ Self::Alanine => 'A',
+ Self::AmbiguousAsparagine => 'B',
+ Self::Cysteine => 'C',
+ Self::AsparticAcid => 'D',
+ Self::GlutamicAcid => 'E',
+ Self::Phenylalanine => 'F',
+ Self::Glycine => 'G',
+ Self::Histidine => 'H',
+ Self::Isoleucine => 'I',
+ Self::AmbiguousLeucine => 'J',
+ Self::Lysine => 'K',
+ Self::Leucine => 'L',
+ Self::Methionine => 'M',
+ Self::Asparagine => 'N',
+ Self::Pyrrolysine => 'O',
+ Self::Proline => 'P',
+ Self::Glutamine => 'Q',
+ Self::Arginine => 'R',
+ Self::Serine => 'S',
+ Self::Threonine => 'T',
+ Self::Selenocysteine => 'U',
+ Self::Valine => 'V',
+ Self::Tryptophan => 'W',
+ Self::Unknown => 'X',
+ Self::Tyrosine => 'Y',
+ Self::AmbiguousGlutamine => 'Z',
+ })
+ }
-impl AminoAcid {
- /// All amino acids with a unique mass (no I/L in favour of J, no B, no Z, and no X)
- pub const UNIQUE_MASS_AMINO_ACIDS: &'static [Self] = &[
- Self::Glycine,
- Self::Alanine,
- Self::Arginine,
- Self::Asparagine,
- Self::AsparticAcid,
- Self::Cysteine,
- Self::Glutamine,
- Self::GlutamicAcid,
- Self::Histidine,
- Self::AmbiguousLeucine,
- Self::Lysine,
- Self::Methionine,
- Self::Phenylalanine,
- Self::Proline,
- Self::Serine,
- Self::Threonine,
- Self::Tryptophan,
- Self::Tyrosine,
- Self::Valine,
- Self::Selenocysteine,
- Self::Pyrrolysine,
- ];
+ fn pro_forma_definition(&self) -> Cow<'_, str> {
+ Cow::Borrowed(match self {
+ Self::Alanine => "A",
+ Self::AmbiguousAsparagine => "B",
+ Self::Cysteine => "C",
+ Self::AsparticAcid => "D",
+ Self::GlutamicAcid => "E",
+ Self::Phenylalanine => "F",
+ Self::Glycine => "G",
+ Self::Histidine => "H",
+ Self::Isoleucine => "I",
+ Self::AmbiguousLeucine => "J",
+ Self::Lysine => "K",
+ Self::Leucine => "L",
+ Self::Methionine => "M",
+ Self::Asparagine => "N",
+ Self::Pyrrolysine => "O",
+ Self::Proline => "P",
+ Self::Glutamine => "Q",
+ Self::Arginine => "R",
+ Self::Serine => "S",
+ Self::Threonine => "T",
+ Self::Selenocysteine => "U",
+ Self::Valine => "V",
+ Self::Tryptophan => "W",
+ Self::Unknown => "X",
+ Self::Tyrosine => "Y",
+ Self::AmbiguousGlutamine => "Z",
+ })
+ }
- /// All 20 canonical amino acids
- pub const CANONICAL_AMINO_ACIDS: &'static [Self] = &[
- Self::Glycine,
- Self::Alanine,
- Self::Arginine,
- Self::Asparagine,
- Self::AsparticAcid,
- Self::Cysteine,
- Self::Glutamine,
- Self::GlutamicAcid,
- Self::Histidine,
- Self::Leucine,
- Self::Isoleucine,
- Self::Lysine,
- Self::Methionine,
- Self::Phenylalanine,
- Self::Proline,
- Self::Serine,
- Self::Threonine,
- Self::Tryptophan,
- Self::Tyrosine,
- Self::Valine,
- ];
+ /// Get the 3 letter code for the amino acid
+ fn three_letter_code(&self) -> Option> {
+ Some(Cow::Borrowed(match self {
+ Self::Alanine => "Ala",
+ Self::AmbiguousAsparagine => "Asx",
+ Self::Cysteine => "Cys",
+ Self::AsparticAcid => "Asp",
+ Self::GlutamicAcid => "Glu",
+ Self::Phenylalanine => "Phe",
+ Self::Glycine => "Gly",
+ Self::Histidine => "His",
+ Self::Isoleucine => "Ile",
+ Self::AmbiguousLeucine => "Xle",
+ Self::Lysine => "Lys",
+ Self::Leucine => "Leu",
+ Self::Methionine => "Met",
+ Self::Asparagine => "Asn",
+ Self::Pyrrolysine => "Pyl",
+ Self::Proline => "Pro",
+ Self::Glutamine => "Gln",
+ Self::Arginine => "Arg",
+ Self::Serine => "Ser",
+ Self::Threonine => "Thr",
+ Self::Selenocysteine => "Sec",
+ Self::Valine => "Val",
+ Self::Tryptophan => "Trp",
+ Self::Unknown => "Xaa",
+ Self::Tyrosine => "Tyr",
+ Self::AmbiguousGlutamine => "Glx",
+ }))
+ }
+
+ /// Get the full name for the amino acid
+ fn name(&self) -> Cow<'_, str> {
+ Cow::Borrowed(match self {
+ Self::Alanine => "Alanine",
+ Self::AmbiguousAsparagine => "AmbiguousAsparagine",
+ Self::Cysteine => "Cysteine",
+ Self::AsparticAcid => "AsparticAcid",
+ Self::GlutamicAcid => "GlutamicAcid",
+ Self::Phenylalanine => "Phenylalanine",
+ Self::Glycine => "Glycine",
+ Self::Histidine => "Histidine",
+ Self::Isoleucine => "Isoleucine",
+ Self::AmbiguousLeucine => "AmbiguousLeucine",
+ Self::Lysine => "Lysine",
+ Self::Leucine => "Leucine",
+ Self::Methionine => "Methionine",
+ Self::Asparagine => "Asparagine",
+ Self::Pyrrolysine => "Pyrrolysine",
+ Self::Proline => "Proline",
+ Self::Glutamine => "Glutamine",
+ Self::Arginine => "Arginine",
+ Self::Serine => "Serine",
+ Self::Threonine => "Threonine",
+ Self::Selenocysteine => "Selenocysteine",
+ Self::Valine => "Valine",
+ Self::Tryptophan => "Tryptophan",
+ Self::Unknown => "Unknown",
+ Self::Tyrosine => "Tyrosine",
+ Self::AmbiguousGlutamine => "AmbiguousGlutamine",
+ })
+ }
+
+ fn side_chain(
+ &self,
+ sequence_index: SequencePosition,
+ peptidoform_index: usize,
+ ) -> Cow<'_, Multi> {
+ let crate::SequencePosition::Index(sequence_index) = sequence_index else {
+ return Cow::Owned(Multi::default());
+ };
+ Cow::Owned(match self {
+ Self::Alanine => molecular_formula!(H 3 C 1).into(),
+ Self::Arginine => molecular_formula!(H 10 C 4 N 3).into(), // One of the H's counts as the charge carrier and is added later
+ Self::Asparagine => molecular_formula!(H 4 C 2 O 1 N 1).into(),
+ Self::AsparticAcid => molecular_formula!(H 3 C 2 O 2).into(),
+ Self::AmbiguousAsparagine => vec![
+ molecular_formula!(H 4 C 2 O 1 N 1 (crate::AmbiguousLabel::AminoAcid{option: Self::Asparagine, sequence_index, peptidoform_index})),
+ molecular_formula!(H 3 C 2 O 2 (crate::AmbiguousLabel::AminoAcid{option: Self::AsparticAcid, sequence_index, peptidoform_index})),
+ ]
+ .into(),
+ Self::Cysteine => molecular_formula!(H 3 C 1 S 1).into(),
+ Self::Glutamine => molecular_formula!(H 6 C 3 O 1 N 1).into(),
+ Self::GlutamicAcid => molecular_formula!(H 5 C 3 O 2).into(),
+ Self::AmbiguousGlutamine => vec![
+ molecular_formula!(H 6 C 3 O 1 N 1 (crate::AmbiguousLabel::AminoAcid{option: Self::Glutamine, sequence_index, peptidoform_index})),
+ molecular_formula!(H 5 C 3 O 2 (crate::AmbiguousLabel::AminoAcid{option: Self::GlutamicAcid, sequence_index, peptidoform_index})),
+ ]
+ .into(),
+ Self::Glycine => molecular_formula!(H 1).into(),
+ Self::Histidine => molecular_formula!(H 5 C 4 N 2).into(),
+ Self::AmbiguousLeucine | Self::Isoleucine | Self::Leucine => {
+ molecular_formula!(H 9 C 4).into()
+ }
+ Self::Lysine => molecular_formula!(H 10 C 4 N 1).into(),
+ Self::Methionine => molecular_formula!(H 7 C 3 S 1).into(),
+ Self::Phenylalanine => molecular_formula!(H 7 C 7).into(),
+ Self::Proline => molecular_formula!(H 5 C 3).into(),
+ Self::Pyrrolysine => molecular_formula!(H 17 C 9 O 1 N 2).into(),
+ Self::Selenocysteine => molecular_formula!(H 3 C 1 Se 1).into(),
+ Self::Serine => molecular_formula!(H 3 C 1 O 1).into(),
+ Self::Threonine => molecular_formula!(H 5 C 2 O 1).into(),
+ Self::Tryptophan => molecular_formula!(H 8 C 9 N 1).into(),
+ Self::Tyrosine => molecular_formula!(H 7 C 7 O 1).into(),
+ Self::Valine => molecular_formula!(H 7 C 3).into(),
+ Self::Unknown => molecular_formula!().into(),
+ })
+ }
// TODO: Take side chain mutations into account (maybe define pyrrolysine as a mutation)
- /// # Panics
- /// When the sequence index is terminal.
- pub(crate) fn satellite_ion_fragments(
- self,
+ fn satellite_ion_fragments(
+ &self,
sequence_index: SequencePosition,
peptidoform_index: usize,
- ) -> Multi {
+ ) -> Option>> {
let crate::SequencePosition::Index(sequence_index) = sequence_index else {
- panic!("Not allowed to call satellite ion fragments with a terminal sequence index")
+ return None;
};
+ Some(Cow::Owned(
match self {
Self::Alanine
| Self::Glycine
@@ -172,7 +260,7 @@ impl AminoAcid {
]
.into(),
Self::Valine => molecular_formula!(H 3 C 1).into(), // Technically two options, but both have the same mass
- }
+ }))
}
/// All losses from the base immonium ions. Compiled from the sources below.
@@ -251,9 +339,9 @@ impl AminoAcid {
/// | | 55 | | | 55 | | 55 | | | | | 55.0548 | | | | | | 4 | 55.0548 | | 17.0263 | | H3N1 | | H3N1 |
/// | | 44 | | | | | | | | | | | | | | | | 1 | 44 | | 28.0811 | | C1H2N1 | | C1H2N1 |
/// | | | | | 41 | | 41 | | | | | 41.0391 | | | | | | 3 | 41.0391 | | 31.0420 | | C1H5N1 | | C1H5N1 |
- fn immonium_losses(self) -> Vec {
+ fn immonium_losses(&self) -> Cow<'_, [NeutralLoss]> {
// TODO: For B/Z there are common immonium ions, but the mass is the same (meaning the loss is different), find a way of representing that
- match self {
+ Cow::Owned(match self {
Self::Arginine => vec![
NeutralLoss::Gain(molecular_formula!(C 2 O 2)),
NeutralLoss::Loss(molecular_formula!(C 1 H 2)),
@@ -321,8 +409,59 @@ impl AminoAcid {
NeutralLoss::Loss(molecular_formula!(C 1 H 5 N 1)),
],
_ => Vec::new(),
- }
+ })
}
+}
+
+impl AminoAcid {
+ /// All amino acids with a unique mass (no I/L in favour of J, no B, no Z, and no X)
+ pub const UNIQUE_MASS_AMINO_ACIDS: &'static [Self] = &[
+ Self::Glycine,
+ Self::Alanine,
+ Self::Arginine,
+ Self::Asparagine,
+ Self::AsparticAcid,
+ Self::Cysteine,
+ Self::Glutamine,
+ Self::GlutamicAcid,
+ Self::Histidine,
+ Self::AmbiguousLeucine,
+ Self::Lysine,
+ Self::Methionine,
+ Self::Phenylalanine,
+ Self::Proline,
+ Self::Serine,
+ Self::Threonine,
+ Self::Tryptophan,
+ Self::Tyrosine,
+ Self::Valine,
+ Self::Selenocysteine,
+ Self::Pyrrolysine,
+ ];
+
+ /// All 20 canonical amino acids
+ pub const CANONICAL_AMINO_ACIDS: &'static [Self] = &[
+ Self::Glycine,
+ Self::Alanine,
+ Self::Arginine,
+ Self::Asparagine,
+ Self::AsparticAcid,
+ Self::Cysteine,
+ Self::Glutamine,
+ Self::GlutamicAcid,
+ Self::Histidine,
+ Self::Leucine,
+ Self::Isoleucine,
+ Self::Lysine,
+ Self::Methionine,
+ Self::Phenylalanine,
+ Self::Proline,
+ Self::Serine,
+ Self::Threonine,
+ Self::Tryptophan,
+ Self::Tyrosine,
+ Self::Valine,
+ ];
// TODO: generalise over used storage type, so using molecularformula, monoisotopic mass, or average mass, also make sure that AAs can return these numbers in a const fashion
#[expect(clippy::too_many_lines, clippy::too_many_arguments)]
@@ -383,19 +522,23 @@ impl AminoAcid {
));
}
if ions.d.0 && allow_terminal.0 {
- base_fragments.extend(Fragment::generate_all(
- &(-self.satellite_ion_fragments(sequence_index, peptidoform_index)
- * modifications
- * self.formulas_inner(sequence_index, peptidoform_index)
- + molecular_formula!(H 1 C 1 O 1)),
- peptidoform_ion_index,
- peptidoform_index,
- &FragmentType::d(n_pos),
- n_term,
- ions.d.1,
- charge_carriers,
- ions.d.2,
- ));
+ if let Some(satellite_ion_fragments) =
+ self.satellite_ion_fragments(sequence_index, peptidoform_index)
+ {
+ base_fragments.extend(Fragment::generate_all(
+ &(-satellite_ion_fragments.as_ref()
+ * modifications
+ * self.formulas_inner(sequence_index, peptidoform_index)
+ + molecular_formula!(H 1 C 1 O 1)),
+ peptidoform_ion_index,
+ peptidoform_index,
+ &FragmentType::d(n_pos),
+ n_term,
+ ions.d.1,
+ charge_carriers,
+ ions.d.2,
+ ));
+ }
}
if ions.v.0 && allow_terminal.1 {
base_fragments.extend(Fragment::generate_all(
@@ -410,19 +553,23 @@ impl AminoAcid {
));
}
if ions.w.0 && allow_terminal.1 {
- base_fragments.extend(Fragment::generate_all(
- &(-self.satellite_ion_fragments(sequence_index, peptidoform_index)
- * modifications
- * self.formulas_inner(sequence_index, peptidoform_index)
- + molecular_formula!(H 2 N 1)),
- peptidoform_ion_index,
- peptidoform_index,
- &FragmentType::w(c_pos),
- c_term,
- ions.w.1,
- charge_carriers,
- ions.w.2,
- ));
+ if let Some(satellite_ion_fragments) =
+ self.satellite_ion_fragments(sequence_index, peptidoform_index)
+ {
+ base_fragments.extend(Fragment::generate_all(
+ &(-satellite_ion_fragments.as_ref()
+ * modifications
+ * self.formulas_inner(sequence_index, peptidoform_index)
+ + molecular_formula!(H 2 N 1)),
+ peptidoform_ion_index,
+ peptidoform_index,
+ &FragmentType::w(c_pos),
+ c_term,
+ ions.w.1,
+ charge_carriers,
+ ions.w.2,
+ ));
+ }
}
if ions.x.0 && allow_terminal.1 {
base_fragments.extend(Fragment::generate_all(
@@ -483,7 +630,7 @@ impl AminoAcid {
peptidoform_index,
&FragmentType::Immonium(n_pos, self.into()), // TODO: get the actual sequenceelement here
&Multi::default(),
- self.immonium_losses().as_slice(),
+ self.immonium_losses().as_ref(),
charge_carriers,
ions.immonium.1,
));
@@ -491,102 +638,6 @@ impl AminoAcid {
base_fragments
}
- /// Get the single letter representation of the amino acid
- pub const fn char(self) -> char {
- match self {
- Self::Alanine => 'A',
- Self::AmbiguousAsparagine => 'B',
- Self::Cysteine => 'C',
- Self::AsparticAcid => 'D',
- Self::GlutamicAcid => 'E',
- Self::Phenylalanine => 'F',
- Self::Glycine => 'G',
- Self::Histidine => 'H',
- Self::Isoleucine => 'I',
- Self::AmbiguousLeucine => 'J',
- Self::Lysine => 'K',
- Self::Leucine => 'L',
- Self::Methionine => 'M',
- Self::Asparagine => 'N',
- Self::Pyrrolysine => 'O',
- Self::Proline => 'P',
- Self::Glutamine => 'Q',
- Self::Arginine => 'R',
- Self::Serine => 'S',
- Self::Threonine => 'T',
- Self::Selenocysteine => 'U',
- Self::Valine => 'V',
- Self::Tryptophan => 'W',
- Self::Unknown => 'X',
- Self::Tyrosine => 'Y',
- Self::AmbiguousGlutamine => 'Z',
- }
- }
-
- /// Get the 3 letter code for the amino acid
- pub const fn code(self) -> &'static str {
- match self {
- Self::Alanine => "Ala",
- Self::AmbiguousAsparagine => "Asx",
- Self::Cysteine => "Cys",
- Self::AsparticAcid => "Asp",
- Self::GlutamicAcid => "Glu",
- Self::Phenylalanine => "Phe",
- Self::Glycine => "Gly",
- Self::Histidine => "His",
- Self::Isoleucine => "Ile",
- Self::AmbiguousLeucine => "Xle",
- Self::Lysine => "Lys",
- Self::Leucine => "Leu",
- Self::Methionine => "Met",
- Self::Asparagine => "Asn",
- Self::Pyrrolysine => "Pyl",
- Self::Proline => "Pro",
- Self::Glutamine => "Gln",
- Self::Arginine => "Arg",
- Self::Serine => "Ser",
- Self::Threonine => "Thr",
- Self::Selenocysteine => "Sec",
- Self::Valine => "Val",
- Self::Tryptophan => "Trp",
- Self::Unknown => "Xaa",
- Self::Tyrosine => "Tyr",
- Self::AmbiguousGlutamine => "Glx",
- }
- }
-
- /// Get the full name for the amino acid
- pub const fn name(self) -> &'static str {
- match self {
- Self::Alanine => "Alanine",
- Self::AmbiguousAsparagine => "AmbiguousAsparagine",
- Self::Cysteine => "Cysteine",
- Self::AsparticAcid => "AsparticAcid",
- Self::GlutamicAcid => "GlutamicAcid",
- Self::Phenylalanine => "Phenylalanine",
- Self::Glycine => "Glycine",
- Self::Histidine => "Histidine",
- Self::Isoleucine => "Isoleucine",
- Self::AmbiguousLeucine => "AmbiguousLeucine",
- Self::Lysine => "Lysine",
- Self::Leucine => "Leucine",
- Self::Methionine => "Methionine",
- Self::Asparagine => "Asparagine",
- Self::Pyrrolysine => "Pyrrolysine",
- Self::Proline => "Proline",
- Self::Glutamine => "Glutamine",
- Self::Arginine => "Arginine",
- Self::Serine => "Serine",
- Self::Threonine => "Threonine",
- Self::Selenocysteine => "Selenocysteine",
- Self::Valine => "Valine",
- Self::Tryptophan => "Tryptophan",
- Self::Unknown => "Unknown",
- Self::Tyrosine => "Tyrosine",
- Self::AmbiguousGlutamine => "AmbiguousGlutamine",
- }
- }
-
/// Check if two amino acids are considered identical. X is identical to anything, J to IL, B to ND, Z to EQ.
pub(crate) fn canonical_identical(self, rhs: Self) -> bool {
match (self, rhs) {
@@ -604,12 +655,6 @@ impl AminoAcid {
}
}
-impl std::fmt::Display for AminoAcid {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- write!(f, "{}", self.char())
- }
-}
-
#[cfg(test)]
#[expect(clippy::unreadable_literal, clippy::missing_panics_doc)]
mod tests {
@@ -666,7 +711,7 @@ mod tests {
);
println!(
"{}: {} {} {} {}",
- aa.char(),
+ aa.pro_forma_definition(),
mono,
mono_mass,
weight,
diff --git a/rustyms/src/aminoacid/is_amino_acid.rs b/rustyms/src/aminoacid/is_amino_acid.rs
new file mode 100644
index 00000000..882660ab
--- /dev/null
+++ b/rustyms/src/aminoacid/is_amino_acid.rs
@@ -0,0 +1,65 @@
+//! Module used create the [IsAminoAcid] trait
+
+use crate::{
+ formula::MolecularFormula, system::Mass, MassMode, Multi, MultiChemical, NeutralLoss,
+ SequencePosition,
+};
+
+use std::borrow::Cow;
+
+/// A general trait to define amino acids.
+pub trait IsAminoAcid: MultiChemical {
+ /// The full name for this amino acid.
+ fn name(&self) -> Cow<'_, str>;
+ /// The three letter code for this amino acid. Or None if there is no common three letter
+ /// definition for this amino acid.
+ fn three_letter_code(&self) -> Option>;
+ /// The one letter code for this amino acid. Or None if there is no common single character
+ /// definition for this amino acid.
+ #[doc(alias = "code")]
+ fn one_letter_code(&self) -> Option;
+ /// The ProForma definition for this amino acid. If this is not a simple amino acid it can be
+ /// defined as an amino acid with an additional modification. For example `X[H9C2N2]` could be
+ /// used if Arginine was not defined as `R` in ProForma.
+ fn pro_forma_definition(&self) -> Cow<'_, str>;
+ /// The monoisotopic mass of this amino acid. Should be redefined for better performance.
+ fn monoisotopic_mass(&self) -> Cow<'_, Multi> {
+ Cow::Owned(
+ self.formulas()
+ .iter()
+ .map(MolecularFormula::monoisotopic_mass)
+ .collect(),
+ )
+ }
+ /// The average weight of this amino acid. Should be redefined for better performance.
+ fn average_weight(&self) -> Cow<'_, Multi> {
+ Cow::Owned(
+ self.formulas()
+ .iter()
+ .map(MolecularFormula::average_weight)
+ .collect(),
+ )
+ }
+ /// The mass with a given mass mode for this amino acid. Should be redefined for better performance.
+ fn mass(&self, mode: MassMode) -> Cow<'_, Multi> {
+ Cow::Owned(self.formulas().iter().map(|f| f.mass(mode)).collect())
+ }
+ /// The molecular formula of the side chain of the amino acid. The `sequence_index` and
+ /// `peptidoform_index` are used to keep track of ambiguous amino acids.
+ fn side_chain(
+ &self,
+ sequence_index: SequencePosition,
+ peptidoform_index: usize,
+ ) -> Cow<'_, Multi>;
+ /// The molecular formulas that can fragment for satellite ions (d and w). Commonly the fragment
+ /// after the second carbon into the side chain. `MolecularFormula::default()` can be returned
+ /// if no satellite ions are possible. The `sequence_index` and `peptidoform_index` are used to
+ /// keep track of ambiguous amino acids.
+ fn satellite_ion_fragments(
+ &self,
+ sequence_index: SequencePosition,
+ peptidoform_index: usize,
+ ) -> Option>>;
+ /// Common neutral losses for the immonium ion of this amino acid.
+ fn immonium_losses(&self) -> Cow<'_, [NeutralLoss]>;
+}
diff --git a/rustyms/src/aminoacid/mod.rs b/rustyms/src/aminoacid/mod.rs
new file mode 100644
index 00000000..63b0fada
--- /dev/null
+++ b/rustyms/src/aminoacid/mod.rs
@@ -0,0 +1,7 @@
+mod aminoacid;
+mod is_amino_acid;
+pub mod pka;
+pub mod properties;
+
+pub use aminoacid::*;
+pub use is_amino_acid::*;
diff --git a/rustyms/src/aminoacid/pka.rs b/rustyms/src/aminoacid/pka.rs
new file mode 100644
index 00000000..fda9090f
--- /dev/null
+++ b/rustyms/src/aminoacid/pka.rs
@@ -0,0 +1,367 @@
+//! Module used to store and calculate pKa and isoelectric point values for a given [AminoAcid] or [Peptidoform] respectively
+
+use serde::{Deserialize, Serialize};
+
+use crate::{
+ aminoacid::properties::ChargeClass, modification::SimpleModificationInner, AminoAcid, AtMax,
+ Peptidoform, SemiAmbiguous,
+};
+
+use super::is_amino_acid::IsAminoAcid;
+
+/// A source for pKa values, which can be used to calculate the pKa for peptidoforms.
+pub trait PKaSource {
+ /// Get the pKa values for the given amino acid and modifications.
+ #[allow(non_snake_case)]
+ fn pKa(
+ amino_acid: AA,
+ side_chain_modifications: impl Iterator- >,
+ n_terminal_modifications: Option>>,
+ c_terminal_modifications: Option>>,
+ ) -> Option;
+}
+
+impl> Peptidoform {
+ /// Get the calculated isoelectric point (pI) for the peptidoform, or None if any sequence elements lack pKa values.
+ ///
+ /// The isoelectric point is the pH at which the net charge of the peptidoform is zero. This is determined using a binary
+ /// search between pH 0 and 14. The charge at each pH is computed using the Henderson-Hasselbalch equation with pKa values
+ /// from the provided `PKaSource`, considering N-terminal, C-terminal, and sidechain ionizable groups.
+ ///
+ /// # Example
+ /// ```rust
+ /// # use rustyms::{Peptidoform, aminoacid::pka::{PKaSource, PKaLide1991}};
+ /// // Create a SemiAmbiguous Peptidoform for glutamic acid (E) and Alanine (A)
+ /// let peptidoform = Peptidoform::pro_forma(&"EMEVEESPEK", None).unwrap().into_semi_ambiguous().unwrap();
+ /// let pi = peptidoform.isoelectic_point::();
+ /// // The calculated pI is approximately 3.57 based on Lide 1991 pKa values
+ /// assert_eq!(pi.map(|v| (v * 100.0).round() / 100.0), Some(3.57));
+ /// ```
+ ///
+ /// # Shortcomings
+ /// - **Naive Approach**: Does not account for interactions between ionizable groups.
+ /// - **Modifications Ignored**: Modifications affecting pKa are not considered.
+ /// - **Environmental Factors**: Assumes pKa values are independent of sequence and environment.
+ ///
+ /// Get the calculated pKa value for the given peptidoform, or None if any of the sequence elements do not have a defined pKa.
+ #[allow(non_snake_case)]
+ pub fn isoelectic_point>(&self) -> Option {
+ let sequence = self.sequence();
+ if sequence.is_empty() {
+ return None;
+ }
+
+ // Collect all ionizable groups with their pKa values
+ let mut ionizable = Vec::with_capacity(sequence.len() + 2);
+
+ // Handle N-terminal
+ let first = sequence.first()?;
+ ionizable.push((
+ ChargeClass::Positive,
+ Source::pKa(
+ first.aminoacid.aminoacid(),
+ first.modifications.iter().filter_map(|m| m.simple()),
+ Some(self.get_n_term().iter().filter_map(|m| m.simple())),
+ (self.len() == 1).then_some(self.get_c_term().iter().filter_map(|m| m.simple())),
+ )?
+ .n_term(),
+ )); // N-terminal is always positive
+
+ // Handle C-terminal
+ let last = sequence.last()?;
+ ionizable.push((
+ ChargeClass::Negative,
+ Source::pKa(
+ last.aminoacid.aminoacid(),
+ last.modifications.iter().filter_map(|m| m.simple()),
+ (self.len() == 1).then_some(self.get_n_term().iter().filter_map(|m| m.simple())),
+ Some(self.get_c_term().iter().filter_map(|m| m.simple())),
+ )?
+ .c_term(),
+ )); // C-terminal is always negative
+
+ // Handle sidechains
+ for (index, aa) in sequence.iter().enumerate() {
+ if let Some(sidechain) = Source::pKa(
+ aa.aminoacid.aminoacid(),
+ aa.modifications.iter().filter_map(|m| m.simple()),
+ (index == 0).then_some(self.get_n_term().iter().filter_map(|m| m.simple())),
+ (index == self.len() - 1)
+ .then_some(self.get_n_term().iter().filter_map(|m| m.simple())),
+ )?
+ .sidechain()
+ {
+ let charge_class = aa.aminoacid.aminoacid().charge_class();
+ match charge_class {
+ ChargeClass::Positive | ChargeClass::Negative => {
+ ionizable.push((charge_class, sidechain));
+ }
+ ChargeClass::Unknown => return None,
+ ChargeClass::Uncharged => (),
+ }
+ }
+ }
+
+ // Binary search between pH 0-14 to find isoelectric point
+ let mut low = 0.0;
+ let mut high = 14.0;
+ let mut new_pi = 7.775;
+ const EPSILON: f64 = 0.0001;
+
+ while (high - low) > EPSILON {
+ new_pi = (low + high) / 2.0;
+ let charge = calculate_charge(new_pi, &ionizable);
+
+ if charge > 0.0 {
+ low = new_pi;
+ } else {
+ high = new_pi;
+ }
+ }
+
+ Some(new_pi)
+ }
+}
+
+fn calculate_charge(pH: f64, ionizable: &[(ChargeClass, f64)]) -> f64 {
+ let mut charge = 0.0;
+
+ for (class, pka) in ionizable {
+ match class {
+ ChargeClass::Positive => charge += 1.0 / (10.0_f64.powf(pH - pka) + 1.0),
+ ChargeClass::Negative => charge -= 1.0 / (10.0_f64.powf(pka - pH) + 1.0),
+ _ => {}
+ }
+ }
+
+ charge
+}
+/// The pKa for a specific Amino Acid
+#[derive(Copy, Clone, PartialEq, PartialOrd, Debug, Serialize, Deserialize)]
+pub struct AminoAcidPKa {
+ n_term: f64,
+ sidechain: Option,
+ c_term: f64,
+}
+
+impl AminoAcidPKa {
+ const fn new(n_term: f64, sidechain: Option, c_term: f64) -> Self {
+ Self {
+ n_term,
+ sidechain,
+ c_term,
+ }
+ }
+
+ /// Get the pKa value for the n-term of the Amino acid
+ pub const fn n_term(self) -> f64 {
+ self.n_term
+ }
+
+ /// Get the pKa value for the side-chain group of the Amino acid
+ pub const fn sidechain(self) -> Option {
+ self.sidechain
+ }
+
+ /// Get the pKa value for the c-term of the Amino acid
+ pub const fn c_term(self) -> f64 {
+ self.c_term
+ }
+}
+
+/// pKa values from Lide, D. R. (1991). Handbook of Chemistry and Physics: A Ready Reference Book of Chemical and Physical Data.
+pub struct PKaLide1991;
+
+impl PKaSource for PKaLide1991 {
+ fn pKa(
+ amino_acid: AminoAcid,
+ mut side_chain_modifications: impl Iterator
- >,
+ n_terminal_modifications: Option>>,
+ c_terminal_modifications: Option>>,
+ ) -> Option {
+ if side_chain_modifications.next().is_some()
+ || n_terminal_modifications.is_some_and(|mut m| m.next().is_some())
+ || c_terminal_modifications.is_some_and(|mut m| m.next().is_some())
+ {
+ return None;
+ }
+ match amino_acid {
+ AminoAcid::Arginine => Some(AminoAcidPKa::new(9.00, Some(12.10), 2.03)),
+ AminoAcid::Histidine => Some(AminoAcidPKa::new(9.09, Some(6.04), 1.70)),
+ AminoAcid::Lysine => Some(AminoAcidPKa::new(9.16, Some(10.67), 2.15)),
+ AminoAcid::AsparticAcid => Some(AminoAcidPKa::new(9.66, Some(3.71), 1.95)),
+ AminoAcid::GlutamicAcid => Some(AminoAcidPKa::new(9.58, Some(4.15), 2.16)),
+ AminoAcid::Tyrosine => Some(AminoAcidPKa::new(9.04, Some(10.10), 2.24)),
+ AminoAcid::Cysteine => Some(AminoAcidPKa::new(10.28, Some(8.14), 1.91)),
+ AminoAcid::Alanine => Some(AminoAcidPKa::new(9.71, None, 2.33)),
+ AminoAcid::Glycine => Some(AminoAcidPKa::new(9.58, None, 2.34)),
+ AminoAcid::Proline => Some(AminoAcidPKa::new(10.47, None, 1.95)),
+ AminoAcid::Serine => Some(AminoAcidPKa::new(9.05, None, 2.13)),
+ AminoAcid::Threonine => Some(AminoAcidPKa::new(8.96, None, 2.20)),
+ AminoAcid::Methionine => Some(AminoAcidPKa::new(9.08, None, 2.16)),
+ AminoAcid::Phenylalanine => Some(AminoAcidPKa::new(9.09, None, 2.18)),
+ AminoAcid::Tryptophan => Some(AminoAcidPKa::new(9.34, None, 2.38)),
+ AminoAcid::Valine => Some(AminoAcidPKa::new(9.52, None, 2.27)),
+ AminoAcid::Isoleucine => Some(AminoAcidPKa::new(9.60, None, 2.26)),
+ AminoAcid::Leucine => Some(AminoAcidPKa::new(9.58, None, 2.32)),
+ AminoAcid::Glutamine => Some(AminoAcidPKa::new(9.00, None, 2.18)),
+ AminoAcid::Asparagine => Some(AminoAcidPKa::new(8.73, None, 2.16)),
+ _ => None,
+ }
+ }
+}
+
+/// pKa values from Lehninger, A. L., Nelson, D. L., & Cox, M. M. (2005). Lehninger Principles of Biochemistry. Macmillan.
+pub struct PKaLehninger;
+
+impl PKaSource for PKaLehninger {
+ fn pKa(
+ amino_acid: AminoAcid,
+ mut side_chain_modifications: impl Iterator
- >,
+ n_terminal_modifications: Option>>,
+ c_terminal_modifications: Option>>,
+ ) -> Option {
+ if side_chain_modifications.next().is_some()
+ || n_terminal_modifications.is_some_and(|mut m| m.next().is_some())
+ || c_terminal_modifications.is_some_and(|mut m| m.next().is_some())
+ {
+ return None;
+ }
+ match amino_acid {
+ AminoAcid::Arginine => Some(AminoAcidPKa::new(9.04, Some(12.48), 2.17)),
+ AminoAcid::Histidine => Some(AminoAcidPKa::new(9.17, Some(6.00), 1.82)),
+ AminoAcid::Lysine => Some(AminoAcidPKa::new(8.95, Some(10.53), 2.18)),
+ AminoAcid::AsparticAcid => Some(AminoAcidPKa::new(9.60, Some(3.65), 1.88)),
+ AminoAcid::GlutamicAcid => Some(AminoAcidPKa::new(9.67, Some(4.25), 2.19)),
+ AminoAcid::Tyrosine => Some(AminoAcidPKa::new(9.11, Some(10.07), 2.20)),
+ AminoAcid::Cysteine => Some(AminoAcidPKa::new(10.28, Some(8.18), 1.96)),
+ AminoAcid::Alanine => Some(AminoAcidPKa::new(9.69, None, 2.34)),
+ AminoAcid::Glycine => Some(AminoAcidPKa::new(9.60, None, 2.34)),
+ AminoAcid::Proline => Some(AminoAcidPKa::new(10.96, None, 1.99)),
+ AminoAcid::Serine => Some(AminoAcidPKa::new(9.15, None, 2.21)),
+ AminoAcid::Threonine => Some(AminoAcidPKa::new(9.62, None, 2.11)),
+ AminoAcid::Methionine => Some(AminoAcidPKa::new(9.21, None, 2.28)),
+ AminoAcid::Phenylalanine => Some(AminoAcidPKa::new(9.13, None, 1.83)),
+ AminoAcid::Tryptophan => Some(AminoAcidPKa::new(9.39, None, 2.38)),
+ AminoAcid::Valine => Some(AminoAcidPKa::new(9.62, None, 2.32)),
+ AminoAcid::Isoleucine => Some(AminoAcidPKa::new(9.68, None, 2.36)),
+ AminoAcid::Leucine => Some(AminoAcidPKa::new(9.60, None, 2.36)),
+ AminoAcid::Glutamine => Some(AminoAcidPKa::new(9.13, None, 2.17)),
+ AminoAcid::Asparagine => Some(AminoAcidPKa::new(8.80, None, 2.02)),
+ _ => None,
+ }
+ }
+}
+
+#[cfg(test)]
+#[expect(clippy::float_cmp, clippy::missing_panics_doc)]
+mod tests {
+ use super::*;
+ use crate::{modification::SimpleModification, Peptidoform, SemiAmbiguous};
+
+ // Helper to create a Peptidoform from a list of amino acids
+ fn create_peptidoform(aas: &str) -> Peptidoform {
+ Peptidoform::pro_forma(aas, None)
+ .unwrap()
+ .into_semi_ambiguous()
+ .unwrap()
+ }
+
+ // Helper function to test pKa values for a given source
+ fn test_pka>(
+ test_cases: &[(AminoAcid, Option<(f64, Option, f64)>)],
+ ) {
+ for (aa, maybe_values) in test_cases {
+ if let Some((n_term, sidechain, c_term)) = maybe_values {
+ let pka = Source::pKa(
+ *aa,
+ std::iter::empty::(),
+ None::>,
+ None::>,
+ )
+ .unwrap_or_else(|| panic!("Missing pKa for {aa:?}"));
+ let round = |v: f64| (v * 100.0).round() / 100.0;
+
+ assert_eq!(round(pka.n_term()), *n_term, "N-term mismatch for {aa:?}");
+ assert_eq!(
+ pka.sidechain().map(round),
+ *sidechain,
+ "Sidechain mismatch for {aa:?}"
+ );
+ assert_eq!(round(pka.c_term()), *c_term, "C-term mismatch for {aa:?}");
+ } else {
+ assert!(maybe_values.is_none(), "Expected None for {aa:?}");
+ }
+ }
+ }
+
+ // Helper function to test an isoelectric point value given a source
+ fn test_isoelectric_point>(cases: &[(&str, Option)]) {
+ for &(seq, expected) in cases {
+ let peptide = create_peptidoform(seq);
+ let round = |v: f64| (v * 100.0).round() / 100.0;
+ let iso = peptide.isoelectic_point::();
+ assert_eq!(
+ iso.map(round),
+ expected,
+ "Isoelectric point mismatch for peptide: {seq}"
+ );
+ }
+ }
+
+ #[test]
+ fn test_pka_lide1991() {
+ let test_cases = [
+ (AminoAcid::Arginine, Some((9.00, Some(12.10), 2.03))),
+ (AminoAcid::GlutamicAcid, Some((9.58, Some(4.15), 2.16))),
+ (AminoAcid::Alanine, Some((9.71, None, 2.33))),
+ (AminoAcid::Histidine, Some((9.09, Some(6.04), 1.70))),
+ (AminoAcid::Unknown, None),
+ ];
+
+ test_pka::(&test_cases);
+ }
+
+ #[test]
+ fn test_pka_lehninger() {
+ let test_cases = [
+ (AminoAcid::Cysteine, Some((10.28, Some(8.18), 1.96))),
+ (AminoAcid::AsparticAcid, Some((9.60, Some(3.65), 1.88))),
+ (AminoAcid::Isoleucine, Some((9.68, None, 2.36))),
+ (AminoAcid::Tryptophan, Some((9.39, None, 2.38))),
+ (AminoAcid::Selenocysteine, None),
+ ];
+
+ test_pka::(&test_cases);
+ }
+
+ #[test]
+ fn test_isoelectric_point_lide1991() {
+ let test_cases = [
+ ("E", Some(3.16)),
+ ("A", Some(6.02)),
+ ("DE", Some(2.85)),
+ ("HR", Some(10.6)),
+ ("KDEH", Some(5.17)),
+ ("AXRT", None),
+ ("AXRT[Oxidation]", None),
+ ];
+
+ test_isoelectric_point::(&test_cases);
+ }
+
+ #[test]
+ fn test_isoelectric_point_lehninger() {
+ let test_cases = [
+ ("G", Some(5.97)),
+ ("Y", Some(5.65)),
+ ("CQ", Some(6.23)),
+ ("KP", Some(9.74)),
+ ("FIVS", Some(5.67)),
+ ("TKLB", None),
+ ("TK[Oxidation]LB", None),
+ ];
+
+ test_isoelectric_point::(&test_cases);
+ }
+}
diff --git a/rustyms/src/aminoacid_properties.rs b/rustyms/src/aminoacid/properties.rs
similarity index 100%
rename from rustyms/src/aminoacid_properties.rs
rename to rustyms/src/aminoacid/properties.rs
diff --git a/rustyms/src/checked_aminoacid.rs b/rustyms/src/checked_aminoacid.rs
index f709c828..c38a6240 100644
--- a/rustyms/src/checked_aminoacid.rs
+++ b/rustyms/src/checked_aminoacid.rs
@@ -3,7 +3,8 @@ use std::marker::PhantomData;
use serde::{Deserialize, Serialize};
use crate::{
- AminoAcid, Chemical, MolecularFormula, Multi, MultiChemical, SemiAmbiguous, UnAmbiguous,
+ AminoAcid, Chemical, IsAminoAcid, MolecularFormula, Multi, MultiChemical, SemiAmbiguous,
+ UnAmbiguous,
};
/// A checked amino acid. This wraps an [`AminoAcid`] to keep track of the maximal complexity of
@@ -279,24 +280,48 @@ impl CheckedAminoAcid {
self.aminoacid.canonical_identical(rhs.aminoacid)
}
- /// Get the description of the amino acid as a single character
- pub const fn char(self) -> char {
- self.aminoacid.char()
+ /// Get the underlying (unchecked) amino acid
+ pub const fn aminoacid(self) -> AminoAcid {
+ self.aminoacid
}
+}
- /// Get the 3 letter code for the amino acid
- pub const fn code(self) -> &'static str {
- self.aminoacid.code()
+impl IsAminoAcid for CheckedAminoAcid {
+ fn name(&self) -> std::borrow::Cow<'_, str> {
+ self.aminoacid.name()
}
- /// Get the full name of the amino acid
- pub const fn name(self) -> &'static str {
- self.aminoacid.name()
+ fn three_letter_code(&self) -> Option> {
+ self.aminoacid.three_letter_code()
}
- /// Get the underlying (unchecked) amino acid
- pub const fn aminoacid(self) -> AminoAcid {
+ fn one_letter_code(&self) -> Option {
+ self.aminoacid.one_letter_code()
+ }
+
+ fn pro_forma_definition(&self) -> std::borrow::Cow<'_, str> {
+ self.aminoacid.pro_forma_definition()
+ }
+
+ fn immonium_losses(&self) -> std::borrow::Cow<'_, [crate::NeutralLoss]> {
+ self.aminoacid.immonium_losses()
+ }
+
+ fn satellite_ion_fragments(
+ &self,
+ sequence_index: crate::SequencePosition,
+ peptidoform_index: usize,
+ ) -> Option>> {
self.aminoacid
+ .satellite_ion_fragments(sequence_index, peptidoform_index)
+ }
+
+ fn side_chain(
+ &self,
+ sequence_index: crate::SequencePosition,
+ peptidoform_index: usize,
+ ) -> std::borrow::Cow<'_, Multi> {
+ self.aminoacid.side_chain(sequence_index, peptidoform_index)
}
}
@@ -398,7 +423,7 @@ impl Default for CheckedAminoAcid {
impl std::fmt::Display for CheckedAminoAcid {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- write!(f, "{}", self.char())
+ write!(f, "{}", self.pro_forma_definition())
}
}
diff --git a/rustyms/src/element.rs b/rustyms/src/element.rs
index 015d8def..451b6af0 100644
--- a/rustyms/src/element.rs
+++ b/rustyms/src/element.rs
@@ -82,7 +82,7 @@ impl Element {
let mut max = None;
for iso in &elemental_data()[self as usize - 1].2 {
let chance = iso.2 * f64::from(n);
- if max.map_or(true, |m: (Mass, f64)| chance > m.1) {
+ if max.is_none_or(|m: (Mass, f64)| chance > m.1) {
max = Some((iso.1, chance));
}
}
diff --git a/rustyms/src/fragment.rs b/rustyms/src/fragment.rs
index cfd67b9a..890965ad 100644
--- a/rustyms/src/fragment.rs
+++ b/rustyms/src/fragment.rs
@@ -607,9 +607,7 @@ impl FragmentType {
Self::z(_) => Cow::Borrowed("z"),
Self::z·(_) => Cow::Borrowed("z·"),
Self::Y(_) | Self::YComposition(_, _) => Cow::Borrowed("Y"),
- Self::Diagnostic(DiagnosticPosition::Peptide(_, aa)) => {
- Cow::Owned(format!("d{}", aa.char()))
- }
+ Self::Diagnostic(DiagnosticPosition::Peptide(_, aa)) => Cow::Owned(format!("d{aa}")),
Self::Diagnostic(DiagnosticPosition::Reporter) => Cow::Borrowed("r"),
Self::Diagnostic(DiagnosticPosition::Labile(m)) => Cow::Owned(format!("d{m}")),
Self::Diagnostic(
diff --git a/rustyms/src/identification/identified_peptide.rs b/rustyms/src/identification/identified_peptide.rs
index 054ee678..cfa7beae 100644
--- a/rustyms/src/identification/identified_peptide.rs
+++ b/rustyms/src/identification/identified_peptide.rs
@@ -506,9 +506,9 @@ impl IdentifiedPeptide {
precursor_mz: mz, ..
})
| MetaData::MSFragger(MSFraggerData { mz, .. }) => Some(*mz),
- MetaData::MZTab(MZTabData { mz, .. }) | MetaData::MaxQuant(MaxQuantData { mz, .. }) => {
- *mz
- }
+ MetaData::MZTab(MZTabData { mz, .. })
+ | MetaData::MaxQuant(MaxQuantData { mz, .. })
+ | MetaData::DeepNovoFamily(DeepNovoFamilyData { mz, .. }) => *mz,
MetaData::Sage(SageData { mass, z, .. })
| MetaData::NovoB(NovoBData { mass, z, .. })
| MetaData::PLink(PLinkData { mass, z, .. }) => {
@@ -516,8 +516,7 @@ impl IdentifiedPeptide {
mass.value / (z.value as f64),
))
}
- MetaData::DeepNovoFamily(_)
- | MetaData::Fasta(_)
+ MetaData::Fasta(_)
| MetaData::SpectrumSequenceList(_)
| MetaData::PowerNovo(_)
| MetaData::PepNet(_) => None,
diff --git a/rustyms/src/lib.rs b/rustyms/src/lib.rs
index 96d410f9..8814140b 100644
--- a/rustyms/src/lib.rs
+++ b/rustyms/src/lib.rs
@@ -34,8 +34,7 @@ mod formula;
#[path = "shared/csv.rs"]
pub mod csv;
-pub mod aminoacid_properties;
-mod aminoacids;
+pub mod aminoacid;
mod checked_aminoacid;
mod element;
pub mod error;
@@ -85,7 +84,7 @@ pub use crate::sequence_element::SequenceElement;
pub use crate::sequence_position::*;
pub use crate::spectrum::{AnnotatableSpectrum, AnnotatedSpectrum, RawSpectrum};
pub use crate::tolerance::*;
-pub use aminoacids::AminoAcid;
+pub use aminoacid::{AminoAcid, IsAminoAcid};
pub use checked_aminoacid::CheckedAminoAcid;
pub use fragment::Fragment;
pub use peptidoform::{CompoundPeptidoformIon, Peptidoform, PeptidoformIon};
diff --git a/rustyms/src/sequence_element.rs b/rustyms/src/sequence_element.rs
index e53ce32a..9c843973 100644
--- a/rustyms/src/sequence_element.rs
+++ b/rustyms/src/sequence_element.rs
@@ -117,7 +117,7 @@ impl SequenceElement {
if self.ambiguous.is_some() && last_ambiguous != self.ambiguous {
write!(f, "(?")?;
}
- write!(f, "{}", self.aminoacid.char())?;
+ write!(f, "{}", self.aminoacid)?;
for m in &self.modifications {
let mut display_ambiguous = false;
if let Modification::Ambiguous { id, .. } = m {