Skip to content

Commit

Permalink
Adjusted connections due to new db_access set up #126
Browse files Browse the repository at this point in the history
  • Loading branch information
GwennyGit committed Aug 21, 2024
1 parent cac6ad6 commit 4df40b4
Show file tree
Hide file tree
Showing 16 changed files with 299 additions and 311 deletions.
11 changes: 11 additions & 0 deletions docs/source/modules/utility.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@ databases module
:private-members:
:special-members:

db\_access module
-----------------

.. automodule:: refinegems.utility.db_access
:members:
:undoc-members:
:show-inheritance:
:private-members:
:special-members:


entities module
---------------

Expand Down
2 changes: 1 addition & 1 deletion src/refinegems/analysis/growth.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import yaml

from cobra import Model as cobraModel
from ..utility.entities import test_biomass_presence
from ..utility.util import test_biomass_presence
from ..utility.io import load_model, load_a_table_from_database
from ..classes.reports import SingleGrowthSimulationReport, GrowthSimulationReport, AuxotrophySimulationReport, SourceTestReport
from ..classes.medium import Medium, medium_to_model, read_from_cobra_model, load_medium_from_db, read_external_medium
Expand Down
2 changes: 1 addition & 1 deletion src/refinegems/analysis/investigate.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from memote.support import consistency_helpers as con_helpers

from ..utility.io import search_sbo_label
from ..utility.entities import test_biomass_presence
from ..utility.util import test_biomass_presence

################################################################################
# variables
Expand Down
2 changes: 1 addition & 1 deletion src/refinegems/classes/egcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from ..analysis.growth import MIN_GROWTH_THRESHOLD, set_bounds_to_default
from .medium import Medium
from ..utility.entities import test_biomass_presence
from ..utility.util import test_biomass_presence

import cobra
import pandas as pd
Expand Down
2 changes: 1 addition & 1 deletion src/refinegems/classes/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from typing import Literal,Union

from ..analysis.investigate import get_mass_charge_unbalanced, get_orphans_deadends_disconnected, get_reac_with_gpr
from ..utility.entities import test_biomass_presence
from ..utility.util import test_biomass_presence
from ..developement.decorators import *

################################################################################
Expand Down
4 changes: 1 addition & 3 deletions src/refinegems/curation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
__all__ = ['db_access','biomass','charges','curate','pathways','polish']

from . import db_access
__all__ = ['biomass','charges','curate','pathways','polish']

from . import biomass
from . import charges
Expand Down
2 changes: 1 addition & 1 deletion src/refinegems/curation/biomass.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from six import iteritems
from typing import Union

from ..utility.entities import test_biomass_consistency, test_biomass_presence
from ..utility.util import test_biomass_consistency, test_biomass_presence

############################################################################
# variables
Expand Down
2 changes: 1 addition & 1 deletion src/refinegems/curation/charges.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

from libsbml import Model as libModel

from .db_access.modelseed import get_modelseed_compounds
from ..utility.db_access import get_modelseed_compounds

############################################################################
# functions
Expand Down
3 changes: 2 additions & 1 deletion src/refinegems/curation/curate.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
from typing import Literal

from ..utility.cvterms import add_cv_term_reactions, add_cv_term_metabolites, metabol_db_dict, get_id_from_cv_term
from ..utility.entities import create_gpr_from_locus_tag, create_reaction,test_biomass_presence
from ..utility.entities import create_gpr_from_locus_tag, create_reaction
from ..utility.util import test_biomass_presence

################################################################################
# variables
Expand Down
3 changes: 2 additions & 1 deletion src/refinegems/curation/polish.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
from typing import Union

from ..utility.cvterms import add_cv_term_units, add_cv_term_metabolites, add_cv_term_reactions, add_cv_term_genes, generate_cvterm, metabol_db_dict, reaction_db_dict, MIRIAM, OLD_MIRIAM
from ..utility.io import search_ncbi_for_gpr, parse_gff_for_refseq_info, parse_fasta_headers, parse_dict_to_dataframe, load_a_table_from_database
from ..utility.db_access import search_ncbi_for_gpr
from ..utility.io import parse_gff_for_refseq_info, parse_fasta_headers, parse_dict_to_dataframe, load_a_table_from_database

################################################################################
# variables
Expand Down
3 changes: 2 additions & 1 deletion src/refinegems/utility/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
__all__ = ['connections', 'cvterms','databases','entities','io','set_up','util']
__all__ = ['connections', 'cvterms', 'databases', 'db_access', 'entities','io','set_up','util']

from . import connections
from . import cvterms
from . import databases
from . import db_access
from . import entities
from . import io
from . import set_up
Expand Down
100 changes: 92 additions & 8 deletions src/refinegems/utility/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
import cobra
import json
import memote
import pandas as pd
import shutil
import subprocess
import tempfile
import time
import warnings
Expand All @@ -33,7 +35,7 @@
# needed by memote.support.consistency
from memote.support import consistency_helpers as con_helpers

from .entities import test_biomass_presence
from .util import test_biomass_presence
from .io import write_model_to_file

# note:
Expand All @@ -45,13 +47,6 @@
# variables
################################################################################

# database urls
# -------------

BIGG_REACTIONS_URL = 'http://bigg.ucsd.edu/api/v2/universal/reactions/' #: :meta:
BIGG_METABOLITES_URL = 'http://bigg.ucsd.edu/api/v2/universal/metabolites/' #: :meta:


################################################################################
# functions
################################################################################
Expand Down Expand Up @@ -147,6 +142,94 @@ def adjust_BOF(genome:str, model_file:str, model:cobra.Model, dna_weight_fractio
return new_objective


# DIAMOND
# -------

# @ISSUE / @NOTE / @TODO
# is this the right place to put these functions?
def run_DIAMOND_blastp(fasta:str, db:str,
sensitivity:Literal['sensitive', 'more-sensitive', 'very-sensitive','ultra-sensitive']='more-sensitive',
coverage:float=95.0,
threads:int=2,
outdir:str=None, outname:str='DIAMOND_blastp_res.tsv') -> str:
"""Run DIAMOND in BLASTp mode.
Args:
- fasta (str):
The FASTA file to BLAST for.
- db (str):
The DIAMOND database file to BLAST against
- sensitivity (Literal['sensitive', 'more-sensitive', 'very-sensitive','ultra-sensitive'], optional):
Sensitivity mode for DIAMOND.
Defaults to 'more-sensitive'.
- coverage (float, optional):
A parameter for DIAMOND
Coverage theshold for the hits.
Defaults to 95.0.
- threads (int, optional):
A parameter for DIAMOND.
Number of threds to be used while BLASTing.
Defaults to 2.
- outdir (str, optional):
Path to a directory to write the output files to.
Defaults to None.
- outname (str, optional):
Name of the result file (name only, not a path).
Defaults to 'DIAMOND_blastp_res.tsv'.
Returns:
str:
Path to the results of the DIAMOND BLASTp run.
"""

if outdir:
outname = Path(outdir,'DIAMOND_blastp_res.tsv')
logfile = Path(outdir,'log_DIAMOND_blastp.txt')
else:
outname = Path(outname)
logfile = Path('log_DIAMOND_blastp.txt')

# @TODO: test, if it works with different paths and their problems
# @TODO: write additional output to a logfile, not stderr
subprocess.run([F'diamond blastp -d {db} -q {fasta} --{sensitivity} --query-cover {coverage} -p {int(threads)} -o {outname} --outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore 2> {logfile}'], shell=True)

return outname

def filter_DIAMOND_blastp_results(blasttsv:str, pid_theshold:float=90.0) -> pd.DataFrame:
"""Filter the results of a DIAMOND BLASTp run (see
:py:func:`~refinegems.curation.db_access.db.run_DIAMOND_blastp`)
by percentage identity value (PID) and extract the matching pairs of query
and subject IDs.
Args:
- blasttsv (str):
Path to the DIAMOND BLASTp result file.
- pid_theshold (float, optional):
Threshold value for the PID. Given in percent.
Defaults to 90.0.
Raises:
- ValueError: PID threshold has to be between 0.0 and 100.0
Returns:
pd.DataFrame:
A table with the columns query_ID and subject_ID containing hits from
BLAST run with s PID higher than the given threshold value.
"""

if pid_theshold > 100.0 or pid_theshold < 0.0:
raise ValueError('PID threshold has to be between 0.0 and 100.0')

# load diamond results
diamond_results = pd.read_csv(blasttsv, sep='\t', header=None)
diamond_results.columns = ['query_ID', 'subject_ID', 'PID', 'align_len', 'no_mismatch', 'no_gapopen', 'query_start', 'query_end', 'subject_start', 'subject_end','E-value','bitscore']
# filter by PID
diamond_results = diamond_results[diamond_results['PID']>=pid_theshold]
# trim cols
diamond_results = diamond_results[['query_ID','subject_ID']]

return diamond_results


# MCC - MassChargeCuration
# ------------------------
Expand Down Expand Up @@ -308,3 +391,4 @@ def run_SBOannotator(model: libModel) -> libModel:
model = sbo_annotator(doc,model,'constrained-based',str(Path(tempdir,'dbs')),str(Path(tempdir,'dud.xml')))
return model


28 changes: 28 additions & 0 deletions src/refinegems/utility/cvterms.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# requirements
################################################################################

import cobra
import logging
from libsbml import BIOLOGICAL_QUALIFIER, BQB_IS, BQB_OCCURS_IN, BQB_IS_HOMOLOG_TO, MODEL_QUALIFIER, BQM_IS_DESCRIBED_BY, Unit, CVTerm, Species, Reaction, GeneProduct, Group, SBase

Expand Down Expand Up @@ -80,6 +81,33 @@
# functions
################################################################################

# cobra
# -----

def _add_annotations_from_dict_cobra(references:dict, entity:cobra.Reaction|cobra.Metabolite|cobra.Model) -> None:
"""Given a dictionary and a cobra object, add the former as annotations to the latter.
The keys of the dictionary are used as the annotation labels, the values as the values.
If the keys are already in the entity, the values will be combined (union).
Args:
- references (dict):
The dictionary with the references to add the entity.
- entity (cobra.Reaction | cobra.Metabolite | cobra.Model):
The entity to add annotations to.
"""
# add additional references from the parameter
for db,idlist in references.items():
if not isinstance(idlist,list):
idlist = [idlist]
if db in entity.annotation.keys():
entity.annotation[db] = list(set(entity.annotation[db] + idlist))
else:
entity.annotation[db] = idlist



# libsbml
# -------

def add_cv_term_units(unit_id: str, unit: Unit, relation: int):
"""Adds CVTerm to a unit
Expand Down
1 change: 1 addition & 0 deletions src/refinegems/utility/databases.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ def get_database_links_info_per_row(row:pd.Series):
key, value = link.split(':',1)
key = key.strip()
value = value.rsplit('/',1)[1].strip()
value = value.removeprefix('META:') # @TODO: Make case insensitive!
if key in database_ids.keys():
database_ids[key].append(value)
else:
Expand Down
Loading

0 comments on commit 4df40b4

Please sign in to comment.