Skip to content

Commit

Permalink
Merge pull request #18 from BSC-CNS-EAPM/mn5-glidedocking
Browse files Browse the repository at this point in the history
Mn5 glidedocking
  • Loading branch information
AlbertCS authored May 31, 2024
2 parents fd47612 + d07c1d8 commit 430cd12
Show file tree
Hide file tree
Showing 38 changed files with 1,061 additions and 1,533 deletions.
16 changes: 8 additions & 8 deletions EAPM/EAPM.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,21 +95,21 @@ def createPlugin():

eapmPlugin.addBlock(epPredBlock)

from Blocks.ClassificationBioMl import classificationBioMLBlock # type: ignore
# from Blocks.testBlock import testBlock # type: ignore

eapmPlugin.addBlock(classificationBioMLBlock)
# eapmPlugin.addBlock(testBlock)

from Blocks.RegressionBioMl import regressionBioMLBlock # type: ignore
from Blocks.AnalyseGlide import AnalyseGBlock # type: ignore

eapmPlugin.addBlock(regressionBioMLBlock)
eapmPlugin.addBlock(AnalyseGBlock)

from Blocks.PredictBioML import PredictBioMLBlock # type: ignore
# from Blocks.Rbcavity import rbCavityBlock # type: ignore

eapmPlugin.addBlock(PredictBioMLBlock)
# eapmPlugin.addBlock(rbCavityBlock)

from Blocks.outliersBioMl import outliersBioMLBlock # type: ignore
# from Blocks.Rbdock import rbDockBlock # type: ignore

eapmPlugin.addBlock(outliersBioMLBlock)
# eapmPlugin.addBlock(rbDockBlock)

# Add the configs
from Configs.mafftConfig import mafftExecutableConfig # type: ignore
Expand Down
17 changes: 12 additions & 5 deletions EAPM/Include/Blocks/Ahatool.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
import datetime
import os
import subprocess

from HorusAPI import PluginBlock, PluginVariable, VariableList, VariableTypes

# TODO Add to the documentation
Expand Down Expand Up @@ -46,6 +42,13 @@
##############################
# Other variables #
##############################
removeExistingResults = PluginVariable(
name="Remove existing results",
id="remove_existing_results",
description="Remove existing results",
type=VariableTypes.BOOLEAN,
defaultValue=False,
)
prefixVar = PluginVariable(
name="Prefix",
id="prefix",
Expand Down Expand Up @@ -78,6 +81,10 @@

def initialAction(block: PluginBlock):

import datetime
import os
import subprocess

container_name = block.inputs.get("container_name", "bsceapm/ahatool:2.2")
input_fasta = block.inputs.get("input_fasta", None)
fasta = os.path.basename(input_fasta)
Expand Down Expand Up @@ -147,6 +154,6 @@ def initialAction(block: PluginBlock):
action=initialAction,
description="Iteratively search a protein sequence against a protein database",
inputs=[inputFasta, dbPath, containerName],
variables=[prefixVar, startVar, evalVar, threadsVar],
variables=[removeExistingResults, prefixVar, startVar, evalVar, threadsVar],
outputs=[outputAhatool],
)
18 changes: 11 additions & 7 deletions EAPM/Include/Blocks/AlignPdbEAPM.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,26 +124,30 @@ def initialAlign(block: PluginBlock):
alignmentMode = block.variables.get("alignment_mode", "aligned")
referenceResidues = block.variables.get("reference_residues", [])

import prepare_proteins

print("Loading PDB files...")

models = prepare_proteins.proteinModels(inputFolder)

# Parse the chain indexes
if chainIndexes is not None:
chainIndexes = [x["chain_index"] for x in chainIndexes]
else:
chainIndexes = [0]

trajectory_chain_indexes = None
# Parse the trajectory chain indexes
if trajectoryChainIndexes is not None:
trajectoryChainIndexes = [x["trajectory_chain_index"] for x in trajectoryChainIndexes]
trajectory_chain_indexes = {}
for i, model in enumerate(models.models_names):
trajectory_chain_indexes[model] = trajectoryChainIndexes[i]

# Parse the reference residues
if referenceResidues is not None:
referenceResidues = [x["reference_residues"] for x in referenceResidues]

import prepare_proteins

print("Loading PDB files...")

models = prepare_proteins.proteinModels(inputFolder)

print("Aligning models...")

import subprocess
Expand All @@ -163,7 +167,7 @@ def hookSubprocessMafft(command, **kwargs):
pdbReference,
outputFolder,
chain_indexes=chainIndexes,
trajectory_chain_indexes=trajectoryChainIndexes,
trajectory_chain_indexes=trajectory_chain_indexes,
aligment_mode=alignmentMode,
reference_residues=referenceResidues,
verbose=True,
Expand Down
18 changes: 15 additions & 3 deletions EAPM/Include/Blocks/AlphaFoldEAPM.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
Module containing the AlphaFold block for the EAPM plugin
"""

import os

from HorusAPI import PluginVariable, SlurmBlock, VariableTypes

# ==========================#
Expand Down Expand Up @@ -62,6 +60,18 @@ def initialAlphafold(block: SlurmBlock):
folderName = block.variables.get("folder_name", "alphafold")
removeExisting = block.variables.get("remove_existing_results", False)

cpus_per_task = block.variables.get("cpus_per_task")
if cpus_per_task is 1:
print("Alphafold requires at least 20 cpus per task. Changing to 20 cpus per task.")
block.variables["cpus_per_task"] = 20

partiton = block.variables.get("partition")
if partiton is None:
print("Alphafold requires an accelerated partition. Changing to acc_bscls.")
block.variables["partition"] = "acc_bscls"

import os

# If folder already exists, raise exception
if removeExisting and os.path.exists(folderName):
os.system("rm -rf " + folderName)
Expand Down Expand Up @@ -97,6 +107,8 @@ def finalAlhafoldAction(block: SlurmBlock):

resultsFolder = block.extraData["folder_name"]

import os

output_models_folder = os.path.join(downloaded_path, resultsFolder, "output_models")

block.setOutput(outputModelsVariable.id, output_models_folder)
Expand All @@ -106,7 +118,7 @@ def finalAlhafoldAction(block: SlurmBlock):

alphafoldBlock = SlurmBlock(
name="Alphafold",
description="Run Alphafold. (For cte_power, marenostrum, nord3 and minotauro clusters or local)",
description="Run Alphafold. (For marenostrum, nord3 clusters or local)",
initialAction=initialAlphafold,
finalAction=finalAlhafoldAction,
variables=BSC_JOB_VARIABLES + [outputAF, removeExistingResults],
Expand Down
230 changes: 230 additions & 0 deletions EAPM/Include/Blocks/AnalyseGlide.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
from HorusAPI import PluginBlock, PluginVariable, VariableGroup, VariableTypes

# TODO Configure the inputs correctly

# ==========================#
# Variable inputs
# ==========================#
glideOutputVariable = PluginVariable(
id="glide_output",
name="Glide output",
description="Glide output from the BSC calculations block",
type=VariableTypes.CUSTOM,
allowedValues=["bsc_results"],
)
conservedResidues = PluginVariable(
name="Conserved residues",
id="conserved_indexes",
description="The conserved residues",
type=VariableTypes.CUSTOM,
defaultValue=None,
)
residueProtein = PluginVariable(
name="Atom Protein",
id="resi_id1",
description="Atom of the protein to calculate the distance to",
type=VariableTypes.ATOM,
)
residueLigand = PluginVariable(
name="Atom Ligand",
id="resi_id2",
description="Atom of the ligand to calculate the distance to",
type=VariableTypes.ATOM,
)
resNameProt = PluginVariable(
name="Protein residue name",
id="res_name_prot",
description="The protein residue name",
type=VariableTypes.STRING,
defaultValue="CYS",
)
atomNameProt = PluginVariable(
name="Protein atomname",
id="atom_name_prot",
description="The protein atom name",
type=VariableTypes.STRING,
defaultValue="SG",
)
ligandName = PluginVariable(
name="Ligand name",
id="ligand_name",
description="The ligand name",
type=VariableTypes.STRING,
defaultValue="GSH",
)
atomNameLig = PluginVariable(
name="Ligand atom name",
id="atom_name_ligand",
description="The atom name of the ligand",
type=VariableTypes.STRING,
defaultValue="S1",
)

stringGroup = VariableGroup(
id="string_input",
name="Input String",
description="The input are in string",
variables=[
conservedResidues,
glideOutputVariable,
resNameProt,
atomNameProt,
ligandName,
atomNameLig,
],
)
atomGroup = VariableGroup(
id="atom_input",
name="Input Atom",
description="The input are in atom",
variables=[conservedResidues, glideOutputVariable, residueProtein, residueLigand],
)

# Output variables
outputModelsVariable = PluginVariable(
id="best_poses",
name="Best poses",
description="The best poses from the analysis",
type=VariableTypes.FOLDER,
)
analyseGlideOutputVariable = PluginVariable(
id="glide_results_output",
name="Glide results output",
description="Output results of the Glide analysis",
type=VariableTypes.CUSTOM,
allowedValues=["glide_output"],
)

# ==========================#
# Variable
# ==========================#
metricsVar = PluginVariable(
name="Metrics ",
id="metrics",
description="The metrics list",
type=VariableTypes.STRING,
defaultValue="SG_S",
)
removePreviousVar = PluginVariable(
name="Remove previous models",
id="remove_previous",
description="Remove previous",
type=VariableTypes.BOOLEAN,
defaultValue=False,
)
separatorVar = PluginVariable(
name="Separator",
id="separator",
description="The separator",
type=VariableTypes.STRING,
defaultValue="@",
)


def finalAction(block: PluginBlock):

import prepare_proteins

bsc_result = block.inputs.get(glideOutputVariable.id, None)
folder_to_analyse = bsc_result["dock_folder"]
model_folder = bsc_result["model_folder"]

conserved_indexes = block.inputs.get(conservedResidues.id, None)

metrics = block.variables.get("metrics", "SG_S")
remove_previous = block.variables.get("remove_previous", False)
separator = block.variables.get("separator", "@")

if block.selectedInputGroup == stringGroup.id:
res_name_prot = block.inputs.get(resNameProt.id, "CYS")
atom_name_prot = block.inputs.get(atomNameProt.id, "SG")
ligand_name = block.inputs.get(ligandName.id, "GSH")
atom_name_lig = block.inputs.get(atomNameLig.id, "S1")
else:
residue_protein = block.inputs.get(residueProtein.id, None)
res_name_prot = residue_protein["auth_comp_id"]
atom_name_prot = residue_protein["auth_atom_id"]
residue_ligand = block.inputs.get(residueLigand.id, None)
ligand_name = residue_ligand["auth_comp_id"]
atom_name_lig = residue_ligand["auth_atom_id"]
metrics = f"{atom_name_prot}_{atom_name_lig}"

models = prepare_proteins.proteinModels(model_folder)

if conserved_indexes is None:
raise ValueError("Conserved residues must be provided")
if not isinstance(conserved_indexes, dict):
try:
conserved_indexes = int(conserved_indexes)
except ValueError:
raise ValueError("Conserved indexes must be an integer or a dictionary of integers")
conserved_indexes_f = {}
for model in models:
conserved_indexes_f[model] = [conserved_indexes]
conserved_indexes = conserved_indexes_f

center_atom = {} # Create dictionary to store the atom 3-element tuple for each model
for model in models: # Iterate the models inside the library
# Iterate the residues for each Bio.PDB.Structure object
for r in models.structures[model].get_residues():
# Check that the residue matches the defined index
# for cons_ind in conserved_indexes[model]:
if r.id[1] in conserved_indexes[model]:
# Assert that the residue has the correct residue identity
if r.resname == res_name_prot:
# Store the corresponsing tuple.
center_atom[model] = (r.get_parent().id, r.id[1], atom_name_prot)
break

atom_pairs = {} # Define the dictionary containing the atom pairs for each model
for model in models:
atom_pairs[model] = {}
for ligand in [ligand_name]:
atom_pairs[model][ligand] = []
atom_pairs[model][ligand].append((center_atom[model], atom_name_lig))

models.analyseDocking(folder_to_analyse, atom_pairs=atom_pairs, separator=separator)

metric_distances = {} # Define the global dictionary
metric_distances[metrics] = {} # Define the metric nested dictionary
for model in models:
metric_distances[metrics][model] = {} # Define the model nested dictionary
for ligand in models.docking_ligands[model]:
# Define the ligand nested dictionary with all the docking distances list
metric_distances[metrics][model][ligand] = models.getDockingDistances(model, ligand)

models.combineDockingDistancesIntoMetrics(metric_distances)

best_poses = models.getBestDockingPosesIteratively(metric_distances)

models.extractDockingPoses(
best_poses,
folder_to_analyse,
"best_docking_poses",
separator=separator,
remove_previous=remove_previous,
)

block.setOutput(outputModelsVariable.id, "best_docking_poses")

glideOutput = {
"poses_folder": "best_docking_poses",
"models_folder": model_folder, # "prepared_proteins",
"atom_pairs": atom_pairs,
}
import pickle

with open("glide_output.pkl", "wb") as f:
pickle.dump(glideOutput, f)

block.setOutput(analyseGlideOutputVariable.id, glideOutput)


AnalyseGBlock = PluginBlock(
name="Analyse Glide",
description="To analyse Glide results",
action=finalAction,
variables=[metricsVar, removePreviousVar, separatorVar],
inputGroups=[atomGroup, stringGroup],
outputs=[outputModelsVariable, analyseGlideOutputVariable],
)
Loading

0 comments on commit 430cd12

Please sign in to comment.