diff --git a/rustyms/src/identification/identified_peptide.rs b/rustyms/src/identification/identified_peptide.rs index 03cdbf4..4601234 100644 --- a/rustyms/src/identification/identified_peptide.rs +++ b/rustyms/src/identification/identified_peptide.rs @@ -5,7 +5,8 @@ use serde::{Deserialize, Serialize}; use super::{ deepnovofamily::DeepNovoFamilyData, fasta::FastaData, novor::NovorData, opair::OpairData, - peaks::PeaksData, system::MassOverCharge, MSFraggerData, MZTabData, MaxQuantData, SageData, + peaks::PeaksData, plink::PLinkData, system::MassOverCharge, MSFraggerData, MZTabData, + MaxQuantData, SageData, }; use crate::{ @@ -41,6 +42,8 @@ pub enum MetaData { Opair(OpairData), /// Peaks metadata Peaks(PeaksData), + /// pLink metadata + PLink(PLinkData), /// Sage metadata Sage(SageData), } @@ -58,6 +61,7 @@ impl IdentifiedPeptide { | MetaData::MaxQuant(MaxQuantData { peptide, .. }) | MetaData::DeepNovoFamily(DeepNovoFamilyData { peptide, .. }) => peptide.as_ref(), MetaData::Fasta(f) => Some(f.peptide()), + MetaData::PLink(_) => None, //TODO: fix in one way or another } } @@ -72,6 +76,7 @@ impl IdentifiedPeptide { MetaData::Novor(_) => "Novor", MetaData::Opair(_) => "OPair", MetaData::Peaks(_) => "PEAKS", + MetaData::PLink(_) => "pLink", MetaData::Sage(_) => "Sage", } } @@ -87,6 +92,7 @@ impl IdentifiedPeptide { MetaData::Novor(NovorData { version, .. }) => version.to_string(), MetaData::Opair(OpairData { version, .. }) => version.to_string(), MetaData::Peaks(PeaksData { version, .. }) => version.to_string(), + MetaData::PLink(PLinkData { version, .. }) => version.to_string(), MetaData::Sage(SageData { version, .. }) => version.to_string(), } } @@ -103,6 +109,7 @@ impl IdentifiedPeptide { } MetaData::Fasta(f) => f.identifier().accession().to_string(), MetaData::MSFragger(MSFraggerData { scan, .. }) => scan.to_string(), + MetaData::PLink(PLinkData { order, .. }) => order.to_string(), MetaData::MaxQuant(MaxQuantData { id, scan, .. }) => { id.map_or_else(|| scan.iter().join(";"), |id| id.to_string()) } @@ -137,6 +144,7 @@ impl IdentifiedPeptide { | MetaData::Sage(SageData { z, .. }) | MetaData::MSFragger(MSFraggerData { z, .. }) | MetaData::MaxQuant(MaxQuantData { z, .. }) + | MetaData::PLink(PLinkData { z, .. }) | MetaData::MZTab(MZTabData { z, .. }) => Some(*z), MetaData::DeepNovoFamily(DeepNovoFamilyData { z, .. }) => *z, MetaData::Fasta(_) => None, @@ -162,7 +170,7 @@ impl IdentifiedPeptide { MetaData::MaxQuant(MaxQuantData { rt, .. }) | MetaData::Novor(NovorData { rt, .. }) | MetaData::MZTab(MZTabData { rt, .. }) => *rt, - MetaData::DeepNovoFamily(_) | MetaData::Fasta(_) => None, + MetaData::DeepNovoFamily(_) | MetaData::Fasta(_) | MetaData::PLink(_) => None, } } @@ -214,6 +222,22 @@ impl IdentifiedPeptide { |raw_file| SpectrumIds::FileKnown(vec![(raw_file, vec![scan.clone()])]), ) } + MetaData::PLink(PLinkData { + raw_file, + scan, + title, + .. + }) => scan.map_or_else( + || SpectrumIds::FileNotKnown(vec![SpectrumId::Native(title.clone())]), + |scan| { + raw_file.clone().map_or_else( + || SpectrumIds::FileNotKnown(vec![SpectrumId::Index(scan)]), + |raw_file| { + SpectrumIds::FileKnown(vec![(raw_file, vec![SpectrumId::Index(scan)])]) + }, + ) + }, + ), MetaData::Sage(SageData { raw_file, scan, .. }) => { SpectrumIds::FileKnown(vec![(raw_file.clone(), vec![scan.clone()])]) } @@ -231,13 +255,12 @@ impl IdentifiedPeptide { MetaData::MZTab(MZTabData { mz, .. }) | MetaData::MaxQuant(MaxQuantData { mz, .. }) => { *mz } - MetaData::Sage(SageData { - mass: experimental_mass, - z, - .. - }) => Some(MassOverCharge::new::( - experimental_mass.value / (z.value as f64), - )), + MetaData::Sage(SageData { mass, z, .. }) + | MetaData::PLink(PLinkData { mass, z, .. }) => { + Some(MassOverCharge::new::( + mass.value / (z.value as f64), + )) + } MetaData::DeepNovoFamily(_) | MetaData::Fasta(_) => None, } } @@ -249,6 +272,7 @@ impl IdentifiedPeptide { | MetaData::Novor(NovorData { mass, .. }) | MetaData::Opair(OpairData { mass, .. }) | MetaData::MSFragger(MSFraggerData { mass, .. }) + | MetaData::PLink(PLinkData { mass, .. }) | MetaData::Sage(SageData { mass, .. }) => Some(*mass), MetaData::MaxQuant(MaxQuantData { mass, .. }) => *mass, MetaData::MZTab(MZTabData { mz, z, .. }) => mz.map(|mz| mz * z.to_float()), diff --git a/rustyms/src/identification/mod.rs b/rustyms/src/identification/mod.rs index 1561567..bb2d3e3 100644 --- a/rustyms/src/identification/mod.rs +++ b/rustyms/src/identification/mod.rs @@ -14,6 +14,7 @@ mod mztab; mod novor; mod opair; mod peaks; +mod plink; mod sage; use crate::*; @@ -28,6 +29,7 @@ pub use mztab::*; pub use novor::*; pub use opair::*; pub use peaks::*; +pub use plink::*; pub use sage::*; #[cfg(test)] @@ -45,4 +47,6 @@ mod opair_tests; #[cfg(test)] mod peaks_tests; #[cfg(test)] +mod plink_tests; +#[cfg(test)] mod sage_tests; diff --git a/rustyms/src/identification/plink.rs b/rustyms/src/identification/plink.rs new file mode 100644 index 0000000..3da8462 --- /dev/null +++ b/rustyms/src/identification/plink.rs @@ -0,0 +1,405 @@ +use std::{ops::Range, path::PathBuf}; + +use crate::{ + error::{Context, CustomError}, + helper_functions::{explain_number_error, InvertResult}, + identification::{ + common_parser::{Location, OptionalColumn, OptionalLocation}, + csv::{parse_csv, CsvLine}, + modification::SimpleModification, + BoxedIdentifiedPeptideIter, IdentifiedPeptide, IdentifiedPeptideSource, MetaData, + Modification, + }, + molecular_formula, + ontologies::CustomDatabase, + system::{usize::Charge, Mass}, + CrossLinkName, LinearPeptide, Peptidoform, SequencePosition, SloppyParsingParameters, +}; +use itertools::Itertools; +use serde::{Deserialize, Serialize}; + +static NUMBER_ERROR: (&str, &str) = ( + "Invalid pLink line", + "This column is not a number but it is required to be a number in this pLink format", +); +static TYPE_ERROR: (&str, &str) = ( + "Invalid pLink peptide type", + "This column is not a valid paptide type but it is required to be one of 0/1/2/3 in this pLink format", +); + +format_family!( + /// The format for any pLink file + PLinkFormat, + /// The data from any pLink file + PLinkData, + PLinkVersion, [&V2_3], b','; + required { + order: usize, |location: Location, _| location.parse::(NUMBER_ERROR); + title: String, |location: Location, _| Ok(location.get_string()); + z: Charge, |location: Location, _| location.parse::(NUMBER_ERROR).map(Charge::new::); + /// MH+ mass + mass: Mass, |location: Location, _| location.parse::(NUMBER_ERROR).map(Mass::new::); + /// MH+ mass + theoretical_mass: Mass, |location: Location, _| location.parse::(NUMBER_ERROR).map(Mass::new::); + peptide_type: PLinkPeptideType, |location: Location, _| location.parse::(TYPE_ERROR); + peptidoform: Peptidoform, |location: Location, _| { + match plink_separate(location.clone(), "peptide")? { + (pep1, Some(pos1), Some(pep2), Some(pos2)) => { + let pep1 = LinearPeptide::sloppy_pro_forma(location.full_line(), pep1, None, &SloppyParsingParameters::default())?; + let pep2 = LinearPeptide::sloppy_pro_forma(location.full_line(), pep2, None, &SloppyParsingParameters::default())?; + + let mut peptidoform = Peptidoform::new(vec![pep1, pep2]).unwrap(); + peptidoform.add_cross_link( + (0, SequencePosition::Index(pos1.0)), + (1, SequencePosition::Index(pos2.0)), + SimpleModification::Mass(Mass::default().into()), + CrossLinkName::Name("1".to_string()), + ); + Ok(peptidoform) + } + (pep1, Some(pos1), None, Some(pos2)) => { + let pep = LinearPeptide::sloppy_pro_forma(location.full_line(), pep1, None, &SloppyParsingParameters::default())?; + + let mut peptidoform = Peptidoform::new(vec![pep]).unwrap(); + peptidoform.add_cross_link( + (0, SequencePosition::Index(pos1.0)), + (0, SequencePosition::Index(pos2.0)), + SimpleModification::Mass(Mass::default().into()), + CrossLinkName::Name("1".to_string()), + ); + Ok(peptidoform) + } + (pep1, Some(pos1), None, None) => { + let mut pep = LinearPeptide::sloppy_pro_forma(location.full_line(), pep1, None, &SloppyParsingParameters::default())?; + pep[SequencePosition::Index(pos1.0)].modifications.push(SimpleModification::Mass(Mass::default().into()).into()); + + Ok(Peptidoform::new(vec![pep]).unwrap()) + } + (pep1, None, None, None) => { + let pep = LinearPeptide::sloppy_pro_forma(location.full_line(), pep1, None, &SloppyParsingParameters::default())?; + + Ok(Peptidoform::new(vec![pep]).unwrap()) + } + _ => unreachable!() + } + }; + /// All modifications with their attachement, and their index (into the full peptidoform, so anything bigger then the first peptide matches in the second) + ptm: Vec<(SimpleModification, ModificationPosition, usize)>, |location: Location, custom_database: Option<&CustomDatabase>| + location.ignore("null").array(';').map(|v| { + let v = v.trim(); + let position_start = v.as_str().rfind('(').ok_or_else(|| + CustomError::error( + "Invalid pLink modification", + "A pLink modification should follow the format 'Modification[AA](pos)' but the opening bracket '(' was not found", + v.context()))?; + let location_start = v.as_str().rfind('[').ok_or_else(|| + CustomError::error( + "Invalid pLink modification", + "A pLink modification should follow the format 'Modification[AA](pos)' but the opening square bracket '[' was not found", + v.context()))?; + let position = v.full_line()[v.location.start+position_start+1..v.location.end-1].parse::().map_err(|err| + CustomError::error( + "Invalid pLink modification", + format!("A pLink modification should follow the format 'Modification[AA](pos)' but the position number {}", explain_number_error(&err)), + v.context()))?; + let location = v.full_line()[v.location.start+location_start+1..v.location.start+position_start-1].parse::().unwrap(); + + Ok((Modification::sloppy_modification(v.full_line(), v.location.start..v.location.start+location_start, None, custom_database)?, location, position - 1)) + } + ).collect::,_>>(); + refined_score: f64, |location: Location, _| location.parse::(NUMBER_ERROR); + svm_score: f64, |location: Location, _| location.parse::(NUMBER_ERROR); + score: f64, |location: Location, _| location.parse::(NUMBER_ERROR); + e_value: f64, |location: Location, _| location.parse::(NUMBER_ERROR); + /// Whether this peptide is a target (false) or decoy (true) peptide + is_decoy: bool, |location: Location, _| Ok(location.as_str() == "1"); + q_value: f64, |location: Location, _| location.parse::(NUMBER_ERROR); + proteins: Vec<(String, Option, Option, Option)>, |location: Location, _| { + location.array('/').filter(|l| l.as_str().trim().is_empty()).map(|l| { + let separated = plink_separate(l.clone(), "protein")?; + + Ok((l.full_line()[separated.0].trim().to_string(), separated.1.map(|(a, _)| a), separated.2.map(|p| l.full_line()[p].trim().to_string()), separated.3.map(|(a, _)| a))) + }) + .collect::, _>>() + }; + /// If true this indicates that this cross-link binds two different proteins + is_different_protein: bool, |location: Location, _| Ok(location.as_str() == "1"); + raw_file_id: usize, |location: Location, _| location.parse::(NUMBER_ERROR); + is_complex_satisfied: bool, |location: Location, _| Ok(location.as_str() == "1"); + /// Whether this fits within the normal filters applied within pLink + is_filter_in: bool, |location: Location, _| Ok(location.as_str() == "1"); + } + optional { + scan: usize, |location: Location, _| location.parse::(NUMBER_ERROR); + raw_file: PathBuf, |location: Location, _| Ok(Some(location.get_string().into())); + } +); + +fn plink_separate( + location: Location<'_>, + field: &'static str, +) -> Result< + ( + Range, + Option<(usize, Range)>, + Option>, + Option<(usize, Range)>, + ), + CustomError, +> { + let title = format!("Invalid pLink {field}"); + if let Some((peptide1, peptide2)) = location.as_str().split_once(")-") { + let first_end = peptide1.rfind('(').ok_or_else(|| + CustomError::error( + &title, + format!("A pLink {field} should follow the format 'PEP1(pos1)-PEP2(pos2)' but the opening bracket '(' was not found for PEP1"), + Context::line(Some(location.line.line_index()), location.full_line(), location.location.start, peptide1.len())))?; + let second_end = peptide2.rfind('(').ok_or_else(|| + CustomError::error( + &title, + format!("A pLink {field} should follow the format 'PEP1(pos1)-PEP2(pos2)' but the opening bracket '(' was not found for PEP2"), + Context::line(Some(location.line.line_index()), location.full_line(), location.location.start+peptide1.len()+2, peptide2.len())))?; + + let pos1 = + location.location.start + first_end + 1..location.location.start + peptide1.len(); + let first_index = location.full_line()[pos1.clone()].parse::().map_err(|err| + CustomError::error( + &title, + format!("A pLink {field} should follow the format 'PEP1(pos1)-PEP2(pos2)' but the position for PEP1 {}", explain_number_error(&err)), + Context::line_range(Some(location.line.line_index()), location.full_line(), pos1.clone())))?; + let pos2 = location.location.start + peptide1.len() + 2 + second_end + 1 + ..location.location.start + peptide1.len() + 2 + peptide2.len() - 1; + let second_index = location.full_line()[pos2.clone()].parse::().map_err(|err| + CustomError::error( + &title, + format!("A pLink {field} should follow the format 'PEP1(pos1)-PEP2(pos2)' but the position for PEP1 {}", explain_number_error(&err)), + Context::line_range(Some(location.line.line_index()), location.full_line(), pos2.clone())))?; + + Ok(( + location.location.start..location.location.start + first_end, + Some((first_index, pos1)), + Some( + location.location.start + peptide1.len() + 2 + ..location.location.start + peptide1.len() + 2 + second_end, + ), + Some((second_index, pos2)), + )) + } else { + // rsplit to prevent picking a bracket in the text field, and then reverse for it to make sense to human brains + let mut split = location.as_str().rsplitn(3, '(').collect_vec(); + split.reverse(); + + match split.len() { + 3 => { + let start = location.location.start; + let start_pos1 = start + split[0].len() + 1; + let start_pos2 = start_pos1 + split[1].len() + 1; + let end = location.location.end; + + let pos1 = start_pos1..start_pos2 - 2; + let first_index = location.full_line()[pos1.clone()].parse::().map_err(|err| + CustomError::error( + &title, + format!("A pLink {field} should follow the format 'PEP(pos1)(pos2)' but the first position {}", explain_number_error(&err)), + Context::line_range(Some(location.line.line_index()), location.full_line(), pos1.clone())))?; + let pos2 = start_pos2..end - 1; + let second_index = location.full_line()[start_pos2..end-1].parse::().map_err(|err| + CustomError::error( + &title, + format!("A pLink {field} should follow the format 'PEP(pos1)(pos2)' but the second position {}", explain_number_error(&err)), + Context::line_range(Some(location.line.line_index()), location.full_line(), start_pos2..end-1)))?; + + Ok(( + start..start_pos1 - 1, + Some((first_index, pos1)), + None, + Some((second_index, pos2)), + )) + } + 2 => { + let start = location.location.start; + let start_pos1 = start + split[0].len() + 1; + let end = location.location.end; + + let pos1 = start_pos1..end - 1; + let first_index = location.full_line()[pos1.clone()].parse::().map_err(|err| + CustomError::error( + &title, + format!("A pLink {field} should follow the format 'PEP(pos1)(pos2)' but the first position {}", explain_number_error(&err)), + Context::line_range(Some(location.line.line_index()), location.full_line(), pos1.clone())))?; + + Ok((start..start_pos1 - 1, Some((first_index, pos1)), None, None)) + } + 1 => Ok((location.location.clone(), None, None, None)), + _ => unreachable!(), + } + } +} + +/// The Regex to match against pLink title fields +static IDENTIFER_REGEX: std::sync::OnceLock = std::sync::OnceLock::new(); + +#[allow(clippy::fallible_impl_from)] // Is not fallible but not guarenteed by the compiler +impl From for IdentifiedPeptide { + fn from(mut value: PLinkData) -> Self { + // Add all modifications + for (m, pos, index) in &value.ptm { + match pos { + ModificationPosition::NTerm => { + if *index == 0 { + value.peptidoform.peptides_mut()[0].set_simple_n_term(Some(m.clone())); + } else if value.peptidoform.peptides().len() > 1 + && *index == value.peptidoform.peptides()[0].len() + { + value.peptidoform.peptides_mut()[1].set_simple_n_term(Some(m.clone())); + } + } + ModificationPosition::CTerm => { + if *index == value.peptidoform.peptides()[0].len() - 1 { + value.peptidoform.peptides_mut()[0].set_simple_c_term(Some(m.clone())); + } else if value.peptidoform.peptides().len() > 1 + && *index + == value.peptidoform.peptides()[0].len() + + value.peptidoform.peptides()[1].len() + - 1 + { + value.peptidoform.peptides_mut()[1].set_simple_c_term(Some(m.clone())); + } + } + ModificationPosition::Sidechain => { + let l0 = value.peptidoform.peptides()[0].len(); + if *index < l0 { + value.peptidoform.peptides_mut()[0][SequencePosition::Index(*index)] + .modifications + .push(m.clone().into()); + } else if value.peptidoform.peptides().len() > 1 + && *index < l0 + value.peptidoform.peptides()[1].len() + { + value.peptidoform.peptides_mut()[1][SequencePosition::Index(index - l0)] + .modifications + .push(m.clone().into()); + } + } + } + } + + // Find linker based on left over mass + let left_over = value.theoretical_mass + - molecular_formula!(H 1 Electron -1).monoisotopic_mass() + - value + .peptidoform + .formulas() + .first() + .unwrap() + .monoisotopic_mass(); + dbg!(left_over); + + if let Some(m) = IDENTIFER_REGEX + .get_or_init(|| regex::Regex::new(r"([^/]+)\.(\d+)\.\d+.\d+.\d+.\w+").unwrap()) + .captures(&value.title) + { + value.raw_file = Some(m.get(1).unwrap().as_str().into()); + value.scan = Some(m.get(2).unwrap().as_str().parse::().unwrap()); + } + + Self { + score: Some(1.0 - value.score), + metadata: MetaData::PLink(value), + } + } +} + +/// The different types of peptides a cross-link experiment can result in +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default, Serialize, Deserialize)] +pub enum PLinkPeptideType { + #[default] + /// No cross-linkers + Common, + /// A cross-linker, but hydrolsed/monolinker + Hydrolysed, + /// A cross-linker binding to the same peptide in a loop + LoopLink, + /// A cross-linker binding to a different peptide (altough the peptide can be identical) + IntraLink, +} + +impl std::str::FromStr for PLinkPeptideType { + type Err = (); + fn from_str(s: &str) -> Result { + match s { + "0" => Ok(Self::Common), + "1" => Ok(Self::Hydrolysed), + "2" => Ok(Self::LoopLink), + "3" => Ok(Self::IntraLink), + _ => Err(()), + } + } +} + +/// The different types of peptides a cross-link experiment can result in +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Serialize, Deserialize)] +pub enum ModificationPosition { + /// Any N terminal + NTerm, + /// Any C terminal + CTerm, + /// Any side chain + Sidechain, +} + +impl std::str::FromStr for ModificationPosition { + type Err = (); + fn from_str(s: &str) -> Result { + match s { + "ProteinN-term" | "PeptideN-term" => Ok(Self::NTerm), + "ProteinC-term" | "PeptideC-term" => Ok(Self::CTerm), + _ => Ok(Self::Sidechain), + } + } +} + +/// The only built in version of pLink export +pub const V2_3: PLinkFormat = PLinkFormat { + version: PLinkVersion::V2_3, + order: "order", + title: "title", + z: "charge", + mass: "precursor_mh", + peptide_type: "peptide_type", + peptidoform: "peptide", + theoretical_mass: "peptide_mh", + ptm: "modifications", + refined_score: "refined_score", + svm_score: "svm_score", + score: "score", + e_value: "e-value", + is_decoy: "target_decoy", + q_value: "q-value", + proteins: "proteins", + is_different_protein: "protein_type", + raw_file_id: "fileid", + is_complex_satisfied: "iscomplexsatisfied", + is_filter_in: "isfilterin", + scan: OptionalColumn::NotAvailable, + raw_file: OptionalColumn::NotAvailable, +}; + +/// All possible pLink versions +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default, Serialize, Deserialize)] +pub enum PLinkVersion { + /// Built for pLink version 2.3.11, likely works more broadly + #[default] + V2_3, +} + +impl std::fmt::Display for PLinkVersion { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> { + write!( + f, + "{}", + match self { + Self::V2_3 => "v2.3", + } + ) + } +} diff --git a/rustyms/src/identification/plink_tests.rs b/rustyms/src/identification/plink_tests.rs new file mode 100644 index 0000000..94dc4dd --- /dev/null +++ b/rustyms/src/identification/plink_tests.rs @@ -0,0 +1,44 @@ +#![allow(clippy::missing_panics_doc)] +use std::io::BufReader; + +use crate::identification::{test_format, PLinkData, PLinkVersion}; + +#[test] +fn plink() { + match test_format::( + BufReader::new(DATA_V2_3.as_bytes()), + None, + false, + false, + Some(PLinkVersion::V2_3), + ) { + Ok(n) => assert_eq!(n, 21), + Err(e) => { + println!("{e}"); + panic!("Failed identified peptides test"); + } + } +} + +const DATA_V2_3: &str = r"Order,Title,Charge,Precursor_MH,Peptide_Type,Peptide,Peptide_MH,Modifications,Refined_Score,SVM_Score,Score,E-value,Precursor_Mass_Error(Da),Precursor_Mass_Error(ppm),Target_Decoy,Q-value,Proteins,Protein_Type,FileID,isComplexSatisfied,isFilterIn +1,20240205_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT2_F16.19136.19136.3.0.dta,3,1642.831563,2,IDPEKLSVNSHFMK(2)(5),1642.825678,Oxidation[M](13),199.200765,107.603000,1.856141e-047,1.000000e+000,0.005885,3.582243,2,0.000000,sp|Q9CR21|ACPM_MOUSE (26)(29)/,0,121,1,1 +2,20240205_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT2_F16.19593.19593.3.0.dta,3,1642.835591,2,IDPEKLSVNSHFMK(4)(5),1642.825678,Oxidation[M](13),175.123247,106.257000,7.131342e-047,1.000000e+000,0.009913,6.034116,2,0.000000,sp|Q9CR21|ACPM_MOUSE (28)(29)/,0,121,1,1 +3,20240205_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT2_F16.24008.24008.3.0.dta,3,1626.832670,2,IDPEKLSVNSHFMK(4)(5),1626.830764,null,175.761841,95.988800,2.053969e-042,1.000000e+000,0.001906,1.171603,2,0.000000,sp|Q9CR21|ACPM_MOUSE (28)(29)/,0,121,1,1 +4,20240205_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT2_F16.18408.18408.3.0.dta,3,2952.490051,2,AAPAPAAAPAAAPAAAPEPERPKEAEFDASK(20)(23),2952.484874,null,125.624997,90.802700,3.671880e-040,1.000000e+000,0.005177,1.753438,2,0.000000,sp|P09542|MYL3_MOUSE (39)(42)/,0,121,1,1 +5,20240206_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT3_F20.9089.9089.3.0.dta,3,2146.077031,2,RIKVEKPVVEMDGDEMTR(3)(5),2146.062997,Oxidation[M](11);Oxidation[M](16),162.571366,86.276000,3.394762e-038,1.000000e+000,0.014034,6.539417,2,0.000000,sp|P54071|IDHP_MOUSE (6)(8)/,0,138,1,1 +6,20240206_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT3_F20.10653.10653.3.0.dta,3,1726.830769,2,RKEEEHMIDWVEK(2)(3),1726.821652,Oxidation[M](7),218.639779,86.134100,3.912332e-038,1.000000e+000,0.009117,5.279642,2,0.000000,sp|Q9CQQ7|AT5F1_MOUSE (177)(178)/,0,138,1,1 +7,20240206_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_KO3_F19.28486.28486.3.0.dta,3,1889.927362,2,EEEHMIDWVEKHVVK(10)(11),1889.921360,null,212.478290,84.415800,2.181142e-037,1.000000e+000,0.006002,3.175794,2,0.000000,sp|Q9CQQ7|AT5F1_MOUSE (187)(188)/,0,63,1,1 +8,20240205_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT2_F16.23369.23369.3.0.dta,3,1626.832913,2,IDPEKLSVNSHFMK(2)(5),1626.830764,null,156.477937,83.690600,4.504372e-037,1.000000e+000,0.002149,1.320973,2,0.000000,sp|Q9CR21|ACPM_MOUSE (26)(29)/,0,121,1,1 +9,20240206_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_KO3_F19.29975.29975.4.0.dta,4,3026.492869,2,GGAEVQIFAPDVPQMHVIDHTKGEPSER(22)(24),3026.478740,null,148.182008,83.553700,5.165224e-037,1.000000e+000,0.014129,4.668462,2,0.000000,sp|Q9D172|GAL3A_MOUSE (53)(55)/,0,63,1,1 +10,20240207_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT3_F24_4uL.23744.23744.4.0.dta,4,2174.125137,2,RKEEEHMIDWVEKHVVK(12)(13),2174.117417,null,216.982238,82.968900,9.269676e-037,1.000000e+000,0.007720,3.550866,2,0.000000,sp|Q9CQQ7|AT5F1_MOUSE (187)(188)/,0,144,1,1 +11,20240205_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT2_F16.18398.18398.3.0.dta,3,2952.490347,2,AAPAPAAAPAAAPAAAPEPERPKEAEFDASK(20)(23),2952.484874,null,109.558310,81.783900,3.031821e-036,1.000000e+000,0.005473,1.853693,2,0.000000,sp|P09542|MYL3_MOUSE (39)(42)/,0,121,1,1 +12,20240207_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT3_F24_4uL.20003.20003.3.0.dta,3,1662.887326,2,HVVFGHVKEGMDVVK(8)(9),1662.878380,null,222.355045,78.695900,6.649743e-035,1.000000e+000,0.008946,5.379828,2,0.000000,sp|Q99KR7|PPIF_MOUSE (145)(146)/,0,144,1,1 +13,20240205_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT3_F16.18547.18547.3.0.dta,3,2952.487542,2,AAPAPAAAPAAAPAAAPEPERPKEAEFDASK(20)(23),2952.484874,null,116.750042,77.717700,1.768609e-034,1.000000e+000,0.002668,0.903646,2,0.000000,sp|P09542|MYL3_MOUSE (39)(42)/,0,124,1,1 +14,20240205_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT3_F16.24114.24114.3.0.dta,3,1626.832691,2,IDPEKLSVNSHFMK(4)(5),1626.830764,null,145.915590,74.962900,2.779881e-033,1.000000e+000,0.001927,1.184512,2,0.000000,sp|Q9CR21|ACPM_MOUSE (28)(29)/,0,124,1,1 +15,20240205_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT2_F16.18669.18669.3.0.dta,3,2952.494587,2,AAPAPAAAPAAAPAAAPEPERPKEAEFDASK(23)(24),2952.484874,null,100.707043,74.939100,2.846836e-033,1.000000e+000,0.009713,3.289771,2,0.000000,sp|P09542|MYL3_MOUSE (42)(43)/,0,121,1,1 +16,20240206_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_KO3_F19.11768.11768.3.0.dta,3,2130.073631,2,RIKVEKPVVEMDGDEMTR(3)(5),2130.068083,Oxidation[M](11),127.846755,74.027400,7.084491e-033,1.000000e+000,0.005548,2.604612,2,0.000000,sp|P54071|IDHP_MOUSE (6)(8)/,0,63,1,1 +17,20240207_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT3_F24_4uL.28725.28725.3.0.dta,3,1759.009257,2,GQDIKVPPPLPQFGRK(3)(5),1759.001266,null,134.622296,73.442100,1.272041e-032,1.000000e+000,0.007991,4.542919,2,0.000000,sp|Q6P8J7|KCRS_MOUSE (367)(369)/,0,144,1,1 +18,20240205_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT3_F16.19290.19290.3.0.dta,3,1642.831606,2,IDPEKLSVNSHFMK(2)(5),1642.825678,Oxidation[M](13),206.031048,73.386100,1.345308e-032,1.000000e+000,0.005928,3.608417,2,0.000000,sp|Q9CR21|ACPM_MOUSE (26)(29)/,0,124,1,1 +19,20240203_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT1_F13.9803.9803.3.0.dta,3,1608.899475,3,VGSPPLEK(7)-SAPAVQTK(1),1608.895471,null,164.404474,73.209800,1.604678e-032,1.000000e+000,0.004004,2.488664,2,0.000000,sp|Q99JY0|ECHB_MOUSE (382)-sp|Q99JY0|ECHB_MOUSE (1)/,1,105,1,1 +898,20240129_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT2_F1.30643.30643.2.0.dta,2,978.530764,1,GDQALSFLK(2),978.525427,null,233.969839,32.589600,7.022901e-015,1.000000e+000,0.005337,5.454125,2,0.000000,sp|P48962|ADT1_MOUSE (3)/,0,76,1,1 +932,20240129_EX3_UM5_perez044_SA_EXT00_MitoDMTMM_WT3_F2.12541.12541.3.0.dta,3,2382.956691,0,MTLSDPSEMDELMSEEAYEK,2382.950846,Oxidation[M](1);Oxidation[M](9);Oxidation[M](13),80.316758,32.287200,9.502704e-015,1.000000e+000,0.005845,2.452841,2,0.000000,sp|Q91WK5|GCSH_MOUSE /,0,80,1,1"; diff --git a/rustyms/src/peptide/peptidoform.rs b/rustyms/src/peptide/peptidoform.rs index 194d32f..0ff410a 100644 --- a/rustyms/src/peptide/peptidoform.rs +++ b/rustyms/src/peptide/peptidoform.rs @@ -10,7 +10,7 @@ use crate::{ Fragment, LinearPeptide, Model, MolecularCharge, MolecularFormula, Multi, SequencePosition, }; /// A single peptidoform, can contain multiple linear peptides -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Serialize, Deserialize, Hash)] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Serialize, Deserialize, Hash)] pub struct Peptidoform(pub(crate) Vec>); impl Peptidoform { @@ -79,6 +79,11 @@ impl Peptidoform { &self.0 } + /// Get all peptides making up this peptidoform + pub fn peptides_mut(&mut self) -> &mut [LinearPeptide] { + &mut self.0 + } + /// Set the charge carriers #[allow(clippy::needless_pass_by_value)] pub fn set_charge_carriers(&mut self, charge_carriers: Option) {