Skip to content

Commit

Permalink
change the output format for gnps_annotations in metabolomics results…
Browse files Browse the repository at this point in the history
… file, improve docstrings
  • Loading branch information
liannette committed Oct 16, 2024
1 parent 32ca3dd commit 8e7945d
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 40 deletions.
26 changes: 13 additions & 13 deletions src/nplinker/genomics/bgc.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,22 +194,22 @@ def aa_predictions(self) -> list:
return [self._aa_predictions]

def to_dict(self) -> dict[str, any]:
"""Convert the BGC object to a dictionary that can be used to export the results.
"""Convert the BGC object to a dictionary for exporting results.
This method gathers relevant information from the BGC object and formats it into a dictionary
where each key-value pair represents a specific attribute of the BGC.
This method compiles relevant information from the BGC object and formats it into a dictionary.
Each key-value pair in the dictionary represents a specific attribute of the BGC.
Returns:
dict[str, str]: A dictionary containing relevant information about the BGC object, including:
- GCF_id: A comma-separated string of GCF IDs or "-" if none.
- GCF_bigscape_class: A comma-separated string of BiG-SCAPE classes or "-" if none.
- BGC_name: The name of the BGC.
- strain_id: The ID of the strain.
- description: A description of the BGC.
- antismash_id: The antiSMASH ID.
- antismash_region: The antiSMASH region.
- antismash_cluster_type: A comma-separated string of product predictions.
- mibig_bgc_class: The MiBIG BGC class or "-" if none.
A dictionary containing the following key-value pairs:
- GCF_id (str): A comma-separated string of GCF IDs or "-" if none are available.
- GCF_bigscape_class (str): A comma-separated string of BiG-SCAPE classes or "-" if none are available.
- BGC_name (str): The name of the BGC.
- strain_id (str): The ID of the strain.
- description (str): A description of the BGC.
- antismash_id (str): The antiSMASH ID.
- antismash_region (str): The antiSMASH region.
- antismash_cluster_type (str): A comma-separated string of product predictions.
- mibig_bgc_class (str): The MiBIG BGC class or "-" if none is available.
"""
gcf_ids = {gcf.id for gcf in self.parents if gcf.id is not None}
gcf_bsc = {bsc for bsc in self.bigscape_classes if bsc is not None}
Expand Down
30 changes: 12 additions & 18 deletions src/nplinker/metabolomics/spectrum.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,25 +98,21 @@ def has_strain(self, strain: Strain) -> bool:
"""
return strain in self.strains

def _formatted_gnps_annotations(self) -> str:
"""Format GNPS annotations dictionary into a string."""
return "; ".join(f"{k}: {v}" for k, v in self.gnps_annotations.items())

def to_dict(self) -> dict[str, any]:
"""Convert the Spectrum object to a dictionary that can be used to export the results.
"""Convert the Spectrum object to a dictionary for exporting results.
This method gathers relevant information from the Spectrum object and formats it into a dictionary
where each key-value pair represents a specific attribute of the Spectrum.
This method compiles relevant information from the Spectrum object into a dictionary format.
Each key-value pair in the dictionary represents a specific attribute of the Spectrum Object.
Returns:
dict[str, str]: A dictionary containing relevant information about the Spectrum object, including:
- "spectrum_id": The unique identifier of the spectrum.
- "num_strains_with_spectrum": The number of strains associated with the spectrum.
- "precursor_mz": The precursor m/z value formatted to four decimal places.
- "rt": The retention time formatted to three decimal places.
- "molecular_family": The identifier of the molecular family, or "-" if not available.
- "gnps_id": The GNPS identifier, or "-" if not available.
- "gnps_annotations": A formatted string of GNPS annotations, or "-" if not available.
A dictionary containing containing the following key-value pairs:
- "spectrum_id" (str): The unique identifier of the spectrum.
- "num_strains_with_spectrum" (int): The number of strains associated with the spectrum.
- "precursor_mz" (float): The precursor m/z value, rounded to four decimal places.
- "rt" (float): The retention time, rounded to three decimal places.
- "molecular_family" (str): The identifier of the molecular family, or "-" if not available.
- "gnps_id" (str): The GNPS identifier, or "-" if not available.
- "gnps_annotations" (dict | str): A dictionary of GNPS annotations, or "-" if not available.
"""
return {
"spectrum_id": self.id,
Expand All @@ -125,7 +121,5 @@ def to_dict(self) -> dict[str, any]:
"rt": round(self.rt, 3),
"molecular_family": self.family.id if self.family else "-",
"gnps_id": self.gnps_id if self.gnps_id else "-",
"gnps_annotations": self._formatted_gnps_annotations()
if self.gnps_annotations
else "-",
"gnps_annotations": self.gnps_annotations if self.gnps_annotations else "-",
}
17 changes: 8 additions & 9 deletions src/nplinker/scoring/link_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,15 +299,14 @@ def get_table_data(self, display_limit: int | None = None) -> list[dict[str, any
table. If None, all rows are included.
Returns:
list: A list of dictionaries, where each dictionary contains
the following keys:
- Index (int)
- Genomic Object Type (str)
- Genomic Object ID (str or int)
- Metabolomic Object Type (str)
- Metabolomic Object ID (str or int)
- Metcalf Score (str, formatted to 2 decimal places, or "-")
- Rosetta Score (str, formatted to 2 decimal places, or "-")
A list of dictionaries, where each dictionary contains
- index (int): The index of the link.
- genomic_object_type (str): The type of the genomic object.
- genomic_object_id (str or int): The ID of the genomic object.
- metabolomic_object_type (str): The type of the metabolomic object.
- metabolomic_object_id (str or int): The ID of the metabolomic object.
- metcalf_score (str): The Metcalf score, formatted to 2 decimal places, or "-".
- rosetta_score (str): The Rosetta score, formatted to 2 decimal places, or "-".
"""
genomic_object_classes = (GCF,)

Expand Down

0 comments on commit 8e7945d

Please sign in to comment.