diff --git a/EAPM/EAPM.py b/EAPM/EAPM.py index fa79928..d6f6c1c 100644 --- a/EAPM/EAPM.py +++ b/EAPM/EAPM.py @@ -91,6 +91,26 @@ def createPlugin(): eapmPlugin.addBlock(ahatoolBlock) + from Blocks.EpPred import epPredBlock # type: ignore + + eapmPlugin.addBlock(epPredBlock) + + # from Blocks.testBlock import testBlock # type: ignore + + # eapmPlugin.addBlock(testBlock) + + from Blocks.AnalyseGlide import AnalyseGBlock # type: ignore + + eapmPlugin.addBlock(AnalyseGBlock) + + # from Blocks.Rbcavity import rbCavityBlock # type: ignore + + # eapmPlugin.addBlock(rbCavityBlock) + + # from Blocks.Rbdock import rbDockBlock # type: ignore + + # eapmPlugin.addBlock(rbDockBlock) + # Add the configs from Configs.mafftConfig import mafftExecutableConfig # type: ignore diff --git a/EAPM/Include/Blocks/Ahatool.py b/EAPM/Include/Blocks/Ahatool.py index 14618eb..e1865e3 100644 --- a/EAPM/Include/Blocks/Ahatool.py +++ b/EAPM/Include/Blocks/Ahatool.py @@ -1,9 +1,7 @@ -import datetime -import os -import subprocess - from HorusAPI import PluginBlock, PluginVariable, VariableList, VariableTypes +# TODO Add to the documentation + # ==========================# # Variable inputs # ==========================# @@ -44,6 +42,13 @@ ############################## # Other variables # ############################## +removeExistingResults = PluginVariable( + name="Remove existing results", + id="remove_existing_results", + description="Remove existing results", + type=VariableTypes.BOOLEAN, + defaultValue=False, +) prefixVar = PluginVariable( name="Prefix", id="prefix", @@ -76,6 +81,10 @@ def initialAction(block: PluginBlock): + import datetime + import os + import subprocess + container_name = block.inputs.get("container_name", "bsceapm/ahatool:2.2") input_fasta = block.inputs.get("input_fasta", None) fasta = os.path.basename(input_fasta) @@ -145,6 +154,6 @@ def initialAction(block: PluginBlock): action=initialAction, description="Iteratively search a protein sequence against a protein database", inputs=[inputFasta, dbPath, containerName], - variables=[prefixVar, startVar, evalVar, threadsVar], + variables=[removeExistingResults, prefixVar, startVar, evalVar, threadsVar], outputs=[outputAhatool], ) diff --git a/EAPM/Include/Blocks/AlignPdbEAPM.py b/EAPM/Include/Blocks/AlignPdbEAPM.py index 989d4ae..d3ef54a 100644 --- a/EAPM/Include/Blocks/AlignPdbEAPM.py +++ b/EAPM/Include/Blocks/AlignPdbEAPM.py @@ -45,7 +45,6 @@ type=VariableTypes.INTEGER, defaultValue=0, ) - chainIndexesAlign = VariableList( name="Chain indexes", id="chain_indexes", @@ -63,15 +62,12 @@ type=VariableTypes.INTEGER, defaultValue=0, ) - trajectoryChainIndexesAlign = VariableList( name="Trajectory chain indexes", id="trajectory_chain_indexes", description="Chain indexes of the target trajectories to use in the alignment.", prototypes=[trajectoryChainIndexVariable], ) - - alignmentModeAlign = PluginVariable( name="Alignment mode", id="alignment_mode", @@ -80,7 +76,6 @@ defaultValue="aligned", allowedValues=["aligned", "exact"], ) - referenceResiduesAlign = PluginVariable( name="Reference residue index", id="reference_residues", @@ -129,26 +124,30 @@ def initialAlign(block: PluginBlock): alignmentMode = block.variables.get("alignment_mode", "aligned") referenceResidues = block.variables.get("reference_residues", []) + import prepare_proteins + + print("Loading PDB files...") + + models = prepare_proteins.proteinModels(inputFolder) + # Parse the chain indexes if chainIndexes is not None: chainIndexes = [x["chain_index"] for x in chainIndexes] else: chainIndexes = [0] + trajectory_chain_indexes = None # Parse the trajectory chain indexes if trajectoryChainIndexes is not None: trajectoryChainIndexes = [x["trajectory_chain_index"] for x in trajectoryChainIndexes] + trajectory_chain_indexes = {} + for i, model in enumerate(models.models_names): + trajectory_chain_indexes[model] = trajectoryChainIndexes[i] # Parse the reference residues if referenceResidues is not None: referenceResidues = [x["reference_residues"] for x in referenceResidues] - import prepare_proteins - - print("Loading PDB files...") - - models = prepare_proteins.proteinModels(inputFolder) - print("Aligning models...") import subprocess @@ -168,7 +167,7 @@ def hookSubprocessMafft(command, **kwargs): pdbReference, outputFolder, chain_indexes=chainIndexes, - trajectory_chain_indexes=trajectoryChainIndexes, + trajectory_chain_indexes=trajectory_chain_indexes, aligment_mode=alignmentMode, reference_residues=referenceResidues, verbose=True, diff --git a/EAPM/Include/Blocks/AlphaFoldEAPM.py b/EAPM/Include/Blocks/AlphaFoldEAPM.py index ce1a7de..6c14350 100644 --- a/EAPM/Include/Blocks/AlphaFoldEAPM.py +++ b/EAPM/Include/Blocks/AlphaFoldEAPM.py @@ -2,8 +2,6 @@ Module containing the AlphaFold block for the EAPM plugin """ -import os - from HorusAPI import PluginVariable, SlurmBlock, VariableTypes # ==========================# @@ -28,7 +26,6 @@ type=VariableTypes.STRING, defaultValue="alphafold", ) - removeExistingResults = PluginVariable( name="Remove existing results", id="remove_existing_results", @@ -63,13 +60,25 @@ def initialAlphafold(block: SlurmBlock): folderName = block.variables.get("folder_name", "alphafold") removeExisting = block.variables.get("remove_existing_results", False) + cpus_per_task = block.variables.get("cpus_per_task") + if cpus_per_task is 1: + print("Alphafold requires at least 20 cpus per task. Changing to 20 cpus per task.") + block.variables["cpus_per_task"] = 20 + + partiton = block.variables.get("partition") + if partiton is None: + print("Alphafold requires an accelerated partition. Changing to acc_bscls.") + block.variables["partition"] = "acc_bscls" + + import os + # If folder already exists, raise exception if removeExisting and os.path.exists(folderName): os.system("rm -rf " + folderName) if not removeExisting and os.path.exists(folderName): raise Exception( - "The folder {} already exists. Please, choose another name or remove it.".format( + "The folder {} already exists. Please, choose another name or remove it with the remove existing folder option.".format( folderName ) ) @@ -88,7 +97,7 @@ def initialAlphafold(block: SlurmBlock): from utils import launchCalculationAction - launchCalculationAction(block, jobs, folderName) + launchCalculationAction(block, jobs, "alphafold", [folderName]) def finalAlhafoldAction(block: SlurmBlock): @@ -98,6 +107,8 @@ def finalAlhafoldAction(block: SlurmBlock): resultsFolder = block.extraData["folder_name"] + import os + output_models_folder = os.path.join(downloaded_path, resultsFolder, "output_models") block.setOutput(outputModelsVariable.id, output_models_folder) @@ -107,7 +118,7 @@ def finalAlhafoldAction(block: SlurmBlock): alphafoldBlock = SlurmBlock( name="Alphafold", - description="Run Alphafold. (For cte_power, marenostrum, nord3 and minotauro clusters or local)", + description="Run Alphafold. (For marenostrum, nord3 clusters or local)", initialAction=initialAlphafold, finalAction=finalAlhafoldAction, variables=BSC_JOB_VARIABLES + [outputAF, removeExistingResults], diff --git a/EAPM/Include/Blocks/AnalyseGlide.py b/EAPM/Include/Blocks/AnalyseGlide.py new file mode 100644 index 0000000..cd3c048 --- /dev/null +++ b/EAPM/Include/Blocks/AnalyseGlide.py @@ -0,0 +1,230 @@ +from HorusAPI import PluginBlock, PluginVariable, VariableGroup, VariableTypes + +# TODO Configure the inputs correctly + +# ==========================# +# Variable inputs +# ==========================# +glideOutputVariable = PluginVariable( + id="glide_output", + name="Glide output", + description="Glide output from the BSC calculations block", + type=VariableTypes.CUSTOM, + allowedValues=["bsc_results"], +) +conservedResidues = PluginVariable( + name="Conserved residues", + id="conserved_indexes", + description="The conserved residues", + type=VariableTypes.CUSTOM, + defaultValue=None, +) +residueProtein = PluginVariable( + name="Atom Protein", + id="resi_id1", + description="Atom of the protein to calculate the distance to", + type=VariableTypes.ATOM, +) +residueLigand = PluginVariable( + name="Atom Ligand", + id="resi_id2", + description="Atom of the ligand to calculate the distance to", + type=VariableTypes.ATOM, +) +resNameProt = PluginVariable( + name="Protein residue name", + id="res_name_prot", + description="The protein residue name", + type=VariableTypes.STRING, + defaultValue="CYS", +) +atomNameProt = PluginVariable( + name="Protein atomname", + id="atom_name_prot", + description="The protein atom name", + type=VariableTypes.STRING, + defaultValue="SG", +) +ligandName = PluginVariable( + name="Ligand name", + id="ligand_name", + description="The ligand name", + type=VariableTypes.STRING, + defaultValue="GSH", +) +atomNameLig = PluginVariable( + name="Ligand atom name", + id="atom_name_ligand", + description="The atom name of the ligand", + type=VariableTypes.STRING, + defaultValue="S1", +) + +stringGroup = VariableGroup( + id="string_input", + name="Input String", + description="The input are in string", + variables=[ + conservedResidues, + glideOutputVariable, + resNameProt, + atomNameProt, + ligandName, + atomNameLig, + ], +) +atomGroup = VariableGroup( + id="atom_input", + name="Input Atom", + description="The input are in atom", + variables=[conservedResidues, glideOutputVariable, residueProtein, residueLigand], +) + +# Output variables +outputModelsVariable = PluginVariable( + id="best_poses", + name="Best poses", + description="The best poses from the analysis", + type=VariableTypes.FOLDER, +) +analyseGlideOutputVariable = PluginVariable( + id="glide_results_output", + name="Glide results output", + description="Output results of the Glide analysis", + type=VariableTypes.CUSTOM, + allowedValues=["glide_output"], +) + +# ==========================# +# Variable +# ==========================# +metricsVar = PluginVariable( + name="Metrics ", + id="metrics", + description="The metrics list", + type=VariableTypes.STRING, + defaultValue="SG_S", +) +removePreviousVar = PluginVariable( + name="Remove previous models", + id="remove_previous", + description="Remove previous", + type=VariableTypes.BOOLEAN, + defaultValue=False, +) +separatorVar = PluginVariable( + name="Separator", + id="separator", + description="The separator", + type=VariableTypes.STRING, + defaultValue="@", +) + + +def finalAction(block: PluginBlock): + + import prepare_proteins + + bsc_result = block.inputs.get(glideOutputVariable.id, None) + folder_to_analyse = bsc_result["dock_folder"] + model_folder = bsc_result["model_folder"] + + conserved_indexes = block.inputs.get(conservedResidues.id, None) + + metrics = block.variables.get("metrics", "SG_S") + remove_previous = block.variables.get("remove_previous", False) + separator = block.variables.get("separator", "@") + + if block.selectedInputGroup == stringGroup.id: + res_name_prot = block.inputs.get(resNameProt.id, "CYS") + atom_name_prot = block.inputs.get(atomNameProt.id, "SG") + ligand_name = block.inputs.get(ligandName.id, "GSH") + atom_name_lig = block.inputs.get(atomNameLig.id, "S1") + else: + residue_protein = block.inputs.get(residueProtein.id, None) + res_name_prot = residue_protein["auth_comp_id"] + atom_name_prot = residue_protein["auth_atom_id"] + residue_ligand = block.inputs.get(residueLigand.id, None) + ligand_name = residue_ligand["auth_comp_id"] + atom_name_lig = residue_ligand["auth_atom_id"] + metrics = f"{atom_name_prot}_{atom_name_lig}" + + models = prepare_proteins.proteinModels(model_folder) + + if conserved_indexes is None: + raise ValueError("Conserved residues must be provided") + if not isinstance(conserved_indexes, dict): + try: + conserved_indexes = int(conserved_indexes) + except ValueError: + raise ValueError("Conserved indexes must be an integer or a dictionary of integers") + conserved_indexes_f = {} + for model in models: + conserved_indexes_f[model] = [conserved_indexes] + conserved_indexes = conserved_indexes_f + + center_atom = {} # Create dictionary to store the atom 3-element tuple for each model + for model in models: # Iterate the models inside the library + # Iterate the residues for each Bio.PDB.Structure object + for r in models.structures[model].get_residues(): + # Check that the residue matches the defined index + # for cons_ind in conserved_indexes[model]: + if r.id[1] in conserved_indexes[model]: + # Assert that the residue has the correct residue identity + if r.resname == res_name_prot: + # Store the corresponsing tuple. + center_atom[model] = (r.get_parent().id, r.id[1], atom_name_prot) + break + + atom_pairs = {} # Define the dictionary containing the atom pairs for each model + for model in models: + atom_pairs[model] = {} + for ligand in [ligand_name]: + atom_pairs[model][ligand] = [] + atom_pairs[model][ligand].append((center_atom[model], atom_name_lig)) + + models.analyseDocking(folder_to_analyse, atom_pairs=atom_pairs, separator=separator) + + metric_distances = {} # Define the global dictionary + metric_distances[metrics] = {} # Define the metric nested dictionary + for model in models: + metric_distances[metrics][model] = {} # Define the model nested dictionary + for ligand in models.docking_ligands[model]: + # Define the ligand nested dictionary with all the docking distances list + metric_distances[metrics][model][ligand] = models.getDockingDistances(model, ligand) + + models.combineDockingDistancesIntoMetrics(metric_distances) + + best_poses = models.getBestDockingPosesIteratively(metric_distances) + + models.extractDockingPoses( + best_poses, + folder_to_analyse, + "best_docking_poses", + separator=separator, + remove_previous=remove_previous, + ) + + block.setOutput(outputModelsVariable.id, "best_docking_poses") + + glideOutput = { + "poses_folder": "best_docking_poses", + "models_folder": model_folder, # "prepared_proteins", + "atom_pairs": atom_pairs, + } + import pickle + + with open("glide_output.pkl", "wb") as f: + pickle.dump(glideOutput, f) + + block.setOutput(analyseGlideOutputVariable.id, glideOutput) + + +AnalyseGBlock = PluginBlock( + name="Analyse Glide", + description="To analyse Glide results", + action=finalAction, + variables=[metricsVar, removePreviousVar, separatorVar], + inputGroups=[atomGroup, stringGroup], + outputs=[outputModelsVariable, analyseGlideOutputVariable], +) diff --git a/EAPM/Include/Blocks/AnalyseGlideDocking.py b/EAPM/Include/Blocks/AnalyseGlideDocking.py index 504bc56..9d1056b 100644 --- a/EAPM/Include/Blocks/AnalyseGlideDocking.py +++ b/EAPM/Include/Blocks/AnalyseGlideDocking.py @@ -1,6 +1,3 @@ -import datetime -import os - from HorusAPI import ( Extensions, PluginBlock, @@ -133,6 +130,10 @@ def analyseDockingAction(block: PluginBlock): + + import datetime + import os + if block.selectedInputGroup == "folder_variable_group": folder_to_analyse = block.inputs.get("docking_folder", "docking") model_folder = block.inputs.get("model_folder", "models") @@ -391,6 +392,7 @@ def analyseDocking( """ import json + import os import pandas as pd import prepare_proteins @@ -529,6 +531,7 @@ def extractDockingPoses( Remove all content in the output folder """ + import os import shutil # Check the separator is not in model or ligand names diff --git a/EAPM/Include/Blocks/AsiteDesign.py b/EAPM/Include/Blocks/AsiteDesign.py index 1bb9b39..ef0b2bd 100644 --- a/EAPM/Include/Blocks/AsiteDesign.py +++ b/EAPM/Include/Blocks/AsiteDesign.py @@ -2,10 +2,7 @@ Module containing the Asitedesign block for the EAPM plugin """ -import os -import subprocess - -from HorusAPI import PluginVariable, SlurmBlock, VariableList, VariableTypes +from HorusAPI import PluginVariable, SlurmBlock, VariableTypes # ==========================# # Variable inputs @@ -34,104 +31,138 @@ defaultValue=None, ) + # ==========================# # Variable outputs # ==========================# -outputFolderAsite = PluginVariable( - name="Asite simulation folder", - id="folder_name", - description="The name of the folder where the simulation will be stored.", - type=VariableTypes.STRING, - defaultValue="AsiteDesign", -) outputModelsAsite = PluginVariable( name="Models", id="models", description="The models generated by the simulation.", type=VariableTypes.FOLDER, - defaultValue="DesignCatalyticSite_job_final_pose", + defaultValue="job_final_pose", ) ############################## # Other variables # ############################## -queue = PluginVariable( - name="Cluster queue", - id="partition", - description="The queue for the simulation", - type=VariableTypes.STRING, - defaultValue="bsc_ls", -) containerAsite = PluginVariable( name="Container", id="container", description="If you are launching the block in a container. The container to use.", - type=VariableTypes.STRING, + type=VariableTypes.FILE, defaultValue=None, ) +outputFolderAsite = PluginVariable( + name="Asite simulation folder", + id="folder_name", + description="The name of the folder where the simulation will be stored.", + type=VariableTypes.STRING, + defaultValue="AsiteDesign", +) +removeExistingResults = PluginVariable( + name="Remove existing results", + id="remove_existing_results", + description="Remove existing results", + type=VariableTypes.BOOLEAN, + defaultValue=False, +) def initialAsite(block: SlurmBlock): + import os + import subprocess + # Get the input variables - input_yaml = block.inputs.get("input_yaml", None) - input_params = block.inputs.get("input_params", None) - input_pdb = block.inputs.get("input_pdb", None) - cpus = block.variables.get("cpus", 0) + input_yaml = block.inputs.get(inputYamlAsite.id, None) + input_params = block.inputs.get(inputParamsAsite.id, None) + input_pdb = block.inputs.get(inputPDBAsite.id, None) container = block.variables.get("container", None) output_file = input_yaml.rstrip(".yaml").split("/")[-1] + ".out" + cpus = block.variables.get("cpus_per_task", 1) + + cwd = os.getcwd() + + if not os.path.exists(input_yaml): + raise Exception(f"Input yaml file {input_yaml} not found") + if not os.path.exists(input_params): + raise Exception(f"Input parameters folder {input_params} not found") + if not os.path.exists(input_pdb): + raise Exception(f"Input pdb file {input_pdb} not found") - # copiar pdb and params to output folder - subprocess.run(["cp", input_yaml, os.getcwd()], check=True) - subprocess.run(["cp", input_pdb, os.getcwd()], check=True) - subprocess.run(["cp", "-r", input_params, os.getcwd()], check=True) + # removeExisting = block.variables.get("remove_existing_results", False) - input_yaml = input_yaml.split("/")[-1] + # if removeExisting and os.path.exists(folder_name): + # os.system("rm -rf " + folder_name) + + # if not removeExisting and os.path.exists(folder_name): + # raise Exception( + # "The folder {} already exists. Please, choose another name or remove it.".format( + # folder_name + # ) + # ) + + # os.makedirs(folder_name, exist_ok=True) + + # # copiar pdb and params to output folder + # subprocess.run(["cp", input_yaml, os.path.join(os.getcwd(), folder_name)], check=True) + # subprocess.run(["cp", input_pdb, os.path.join(os.getcwd(), folder_name)], check=True) + # subprocess.run(["cp", "-r", input_params, os.path.join(os.getcwd(), folder_name)], check=True) + + # input_yaml = folder_name + "/" + os.path.basename(input_yaml) + # input_pdb = folder_name + "/" + os.path.basename(input_pdb) + # input_params = folder_name + "/" + os.path.basename(input_params) + + input_yaml = os.path.basename(input_yaml) cluster = "local" if block.remote.name != "local": cluster = block.remote.host - if "mn" in cluster: - job = f"mpirun -np {cpus} python -m ActiveSiteDesign {input_yaml} > {output_file}" + if "login" in cluster: + job = f"mpirun -n {cpus} python -m ActiveSiteDesign {input_yaml} > {output_file}" elif cluster == "local": if container is None: - job = f"mpirun -np {cpus} python -m ActiveSiteDesign {input_yaml} > {output_file}" + job = f"mpirun -n {cpus} python -m ActiveSiteDesign {input_yaml} > {output_file}" else: if cpus == 0: job = f"singularity exec {container} python -m ActiveSiteDesign {input_yaml} > {output_file}" else: - job = f"mpirun -np {cpus} singularity exec {container} python -m ActiveSiteDesign {input_yaml} > {output_file}" + job = f"mpirun -n {cpus} singularity exec {container} python -m ActiveSiteDesign {input_yaml} > {output_file}" else: - raise Exception("AsiteDesign can only be run on Marenostrum or local") + raise Exception("AsiteDesign can only be run on nord3 or local") from utils import launchCalculationAction - launchCalculationAction(block, [job], "asitedesign", modulePurge=True) + launchCalculationAction( + block, [job], "asitedesign", modulePurge=True # uploadFolders=folder_name, + ) def finalAsiteAction(block: SlurmBlock): + + import os + from utils import downloadResultsAction downloaded_path = downloadResultsAction(block) - resultsFolder = block.outputs["folder_name"] - modelsFolder = block.outputs["models"] - - output_folder = os.path.join(downloaded_path, resultsFolder) - block.setOutput(outputFolderAsite.id, output_folder) + for f in os.listdir(downloaded_path): + if f.endswith("_final_pose"): + resultsFolder = f + break - output_Modelfolder = os.path.join(downloaded_path, f"{resultsFolder}/{modelsFolder}") - block.setOutput(outputModelsAsite.id, output_Modelfolder) + block.setOutput(outputModelsAsite.id, os.path.join(downloaded_path, resultsFolder)) from utils import BSC_JOB_VARIABLES asiteDesignBlock = SlurmBlock( name="AsiteDesign", - description="Run AsiteDesign. (For local or marenostrum)", + description="Run AsiteDesign. (For local or nord3)", initialAction=initialAsite, finalAction=finalAsiteAction, - variables=BSC_JOB_VARIABLES + [containerAsite], + variables=BSC_JOB_VARIABLES + [containerAsite, outputFolderAsite, removeExistingResults], inputs=[inputYamlAsite, inputPDBAsite, inputParamsAsite], - outputs=[outputFolderAsite], + outputs=[outputModelsAsite], ) diff --git a/EAPM/Include/Blocks/ConservedResiduesMSA.py b/EAPM/Include/Blocks/ConservedResiduesMSA.py index 4b7b391..de12d8b 100644 --- a/EAPM/Include/Blocks/ConservedResiduesMSA.py +++ b/EAPM/Include/Blocks/ConservedResiduesMSA.py @@ -1,4 +1,4 @@ -from HorusAPI import PluginBlock, PluginVariable, VariableTypes, Extensions +from HorusAPI import Extensions, PluginBlock, PluginVariable, VariableTypes proteinFolderVariable = PluginVariable( id="protein_folder", @@ -25,13 +25,13 @@ def getConservedMSAPositions(block: PluginBlock): - proteinFolder = block.inputs.get("protein_folder", "proteins") - - import prepare_proteins + proteinFolder = block.inputs.get(proteinFolderVariable.id, "proteins") # Check that there is at least one pdb file in the folder import os + import prepare_proteins + hasPDB = False for file in os.listdir(proteinFolder): if file.endswith(".pdb"): @@ -73,7 +73,7 @@ def hookSubprocessMafft(command, **kwargs): Extensions().loadHTML(html, title="Conserved residues") # Get the residue index to get - residueIndexes = block.variables.get("residue_index", []) + residueIndexes = block.variables.get(residueIndexToGetVariable.id, []) if residueIndexes is None or len(residueIndexes) == 0: # Get all the indexes @@ -90,7 +90,7 @@ def hookSubprocessMafft(command, **kwargs): if len(conservedResidues[model]) == 0: raise Exception( "There are no conserved residues for the selected indexes: " - + " ".join(residueIndexes) + + " ".join(str(residueIndexes)) ) break diff --git a/EAPM/Include/Blocks/EpPred.py b/EAPM/Include/Blocks/EpPred.py new file mode 100644 index 0000000..705d77b --- /dev/null +++ b/EAPM/Include/Blocks/EpPred.py @@ -0,0 +1,348 @@ +from HorusAPI import PluginVariable, SlurmBlock, VariableTypes + +# TODO Making the block to work in marenostrum, if not, will work in local. +# TODO Add to documentation +# For the mn execution set default paths + +# ==========================# +# Variable inputs +# ==========================# +inputFasta = PluginVariable( + name="Input fasta", + id="input_fasta", + description="The input fasta file. (-i)", + type=VariableTypes.FILE, + defaultValue=None, + allowedValues=["fasta"], +) + +# ==========================# +# Variable outputs +# ==========================# +outputEppred = PluginVariable( + name="EP-Pred output", + id="path", + description="The folder containing the results.", + type=VariableTypes.FOLDER, +) + +############################## +# Other variables # +############################## +removeExistingResults = PluginVariable( + name="Remove existing results", + id="remove_existing_results", + description="Remove existing results", + type=VariableTypes.BOOLEAN, + defaultValue=False, +) +pssmDir = PluginVariable( + name="PSSM directory", + id="pssm_dir", + description="The directory containing the PSSM files. (-p)", + type=VariableTypes.FOLDER, + defaultValue=None, +) +fastadir = PluginVariable( + name="Fasta directory", + id="fasta_dir", + description="The directory containing the fasta files. (-f)", + type=VariableTypes.FOLDER, + defaultValue=None, +) +ifeatureDir = PluginVariable( + name="Ifeature directory", + id="ifeature_dir", + description="The directory containing the ifeature files. (-id)", + type=VariableTypes.FOLDER, + defaultValue=None, +) +possumDir = PluginVariable( + name="Possum directory", + id="possum_dir", + description="The directory containing the possum files. (-Po)", + type=VariableTypes.FOLDER, + defaultValue=None, +) +ifeatureOut = PluginVariable( + name="Ifeature out", + id="ifeature_out", + description="The directory where the ifeature features are. (-io)", + type=VariableTypes.FOLDER, + defaultValue=None, +) +possumOut = PluginVariable( + name="Possum out", + id="possum_out", + description="The directory for the possum extractions. (-po)", + type=VariableTypes.FOLDER, + defaultValue=None, +) +filteredOut = PluginVariable( + name="Filtered output", + id="filtered_out", + description="The directory for the filtered features. (-fo)", + type=VariableTypes.FOLDER, + defaultValue=None, +) +dbinp = PluginVariable( + name="Database input", + id="dbinp", + description="The path to the fasta files to create the database. (-di)", + type=VariableTypes.FOLDER, + defaultValue=None, +) +dbout = PluginVariable( + name="Database output", + id="dbout", + description="The path and name of the created database. (-do)", + type=VariableTypes.FOLDER, + defaultValue=None, +) +numThread = PluginVariable( + name="Number of threads", + id="num_thread", + description="The number of threads to use for the generation of pssm profiles and feature extraction. (-n)", + type=VariableTypes.INTEGER, + defaultValue=5, +) +resDir = PluginVariable( + name="Result directory", + id="res_dir", + description="The name for the folder where to store the prediction results. (-rs)", + type=VariableTypes.FOLDER, + defaultValue="results", +) +numSimilarSamples = PluginVariable( + name="Number of similar samples", + id="num_similar_samples", + description="The number of similar training samples to filter the predictions. (-nss)", + type=VariableTypes.INTEGER, + defaultValue=None, +) +restart = PluginVariable( + name="Restart", + id="restart", + description="From which part of the process to restart with. (-re)", + type=VariableTypes.STRING_LIST, + allowedValues=["feature", "predict"], +) +filterOnly = PluginVariable( + name="Filter only", + id="filter_only", + description="True if you already have the features. (-on)", + type=VariableTypes.BOOLEAN, + defaultValue=None, +) +extractionRestart = PluginVariable( + name="Extraction restart", + id="extraction_restart", + description="The file to restart the extraction with. (-er)", + type=VariableTypes.FILE, +) +long = PluginVariable( + name="Long", + id="long", + description="True when restarting from the long commands. (-lg)", + type=VariableTypes.BOOLEAN, + defaultValue=None, +) +run = PluginVariable( + name="Run", + id="run", + description="Run possum or ifeature extraction (-r)", + type=VariableTypes.STRING, + defaultValue=None, + allowedValues=["possum", "ifeature", "both"], +) +start = PluginVariable( + name="Start", + id="start", + description="The starting number. (-st)", + type=VariableTypes.INTEGER, + defaultValue=None, +) +end = PluginVariable( + name="End", + id="end", + description="The ending number, not included. (-en)", + type=VariableTypes.INTEGER, + defaultValue=None, +) +sbatchPath = PluginVariable( + name="Sbatch path", + id="sbatch_path", + description="The folder to keep the run files for generating pssm. (-sp)", + type=VariableTypes.FOLDER, + defaultValue=None, +) +value = PluginVariable( + name="Value", + id="value", + description="The voting threshold to be considered positive. (-v)", + type=VariableTypes.NUMBER_LIST, + defaultValue=None, + allowedValues=[1, 0.8, 0.5], +) +iterations = PluginVariable( + name="Iterations", + id="iterations", + description="The number of iterations in the PSIBlast. (-iter)", + type=VariableTypes.INTEGER, + defaultValue=None, +) + + +def runEppred(block: SlurmBlock): + + import os + + inputfasta = block.inputs.get("input_fasta", None) + + if inputfasta is None: + raise Exception("No input fasta provided") + if not os.path.exists(inputfasta): + raise Exception(f"The input fasta file does not exist: {inputfasta}") + + command = "python -m ep_pred.Launch " + command += f"-i {inputfasta} " + command += f"-n {block.variables.get('num_thread', 5)} " + command += f"-re {block.variables.get('restart', 'predict')} " + + pssm_dir = block.variables.get("pssm_dir", None) + if pssm_dir is not None: + command += f"-p {pssm_dir} " + fasta_dir = block.variables.get("fasta_dir", None) + if fasta_dir is not None: + command += f"-f {fasta_dir} " + ifeature_dir = block.variables.get("ifeature_dir", None) + if ifeature_dir is not None: + command += f"-id {ifeature_dir} " + possum_dir = block.variables.get("possum_dir", None) + if possum_dir is not None: + command += f"-Po {possum_dir} " + ifeature_out = block.variables.get("ifeature_out", None) + if ifeature_out is not None: + command += f"-io {ifeature_out} " + possum_out = block.variables.get("possum_out", None) + if possum_out is not None: + command += f"-po {possum_out} " + filtered_out = block.variables.get("filtered_out", None) + if filtered_out is not None: + command += f"-fo {filtered_out} " + dbinp = block.variables.get("dbinp", None) + if dbinp is not None: + command += f"-di {dbinp} " + dbout = block.variables.get("dbout", None) + if dbout is not None: + command += f"-do {dbout} " + res_dir = block.variables.get("res_dir", None) + if res_dir is not None: + command += f"-rs {res_dir} " + num_similar_samples = block.variables.get("num_similar_samples", None) + if num_similar_samples is not None: + command += f"-nss {num_similar_samples} " + restart = block.variables.get("restart", "feature") + if restart is not None: + command += f"-re {restart} " + filter_only = block.variables.get("filter_only", None) + if filter_only is not None: + command += f"-on {filter_only} " + extraction_restart = block.variables.get("extraction_restart", None) + if extraction_restart is not None: + command += f"-er {extraction_restart} " + long = block.variables.get("long", None) + if long is not None: + command += f"-lg {long} " + run = block.variables.get("run", None) + if run is not None: + command += f"-r {run} " + start = block.variables.get("start", None) + if start is not None: + command += f"-st {start} " + end = block.variables.get("end", None) + if end is not None: + command += f"-en {end} " + sbatch_path = block.variables.get("sbatch_path", None) + if sbatch_path is not None: + command += f"-sp {sbatch_path} " + value = block.variables.get("value", None) + if value is not None: + command += f"-v {value} " + iterations = block.variables.get("iterations", None) + if iterations is not None: + command += f"-iter {iterations} " + + jobs = [command] + + folderName = block.variables.get("folder_name", "epPred") + block.extraData["folder_name"] = folderName + removeExisting = block.variables.get("remove_existing_results", False) + + # If folder already exists, raise exception + if removeExisting and os.path.exists(folderName): + os.system("rm -rf " + folderName) + + if not removeExisting and os.path.exists(folderName): + raise Exception( + "The folder {} already exists. Please, choose another name or remove it.".format( + folderName + ) + ) + + # Create an copy the inputs + os.makedirs(folderName, exist_ok=True) + os.system(f"cp {inputfasta} {folderName}") + + from utils import launchCalculationAction + + launchCalculationAction( + block, + jobs, + program="epPred", + uploadFolders=[ + folderName, + ], + ) + + +def finalAction(block: SlurmBlock): + pass + + +from utils import BSC_JOB_VARIABLES + +epPredBlock = SlurmBlock( + name="Ep-pred", + initialAction=runEppred, + finalAction=finalAction, + description="A machine learning program to predict promiscuity of esterases.", + inputs=[inputFasta], + variables=BSC_JOB_VARIABLES + + [ + removeExistingResults, + pssmDir, + fastadir, + ifeatureDir, + possumDir, + ifeatureOut, + possumOut, + filteredOut, + dbinp, + dbout, + numThread, + resDir, + numSimilarSamples, + restart, + filterOnly, + extractionRestart, + long, + run, + start, + end, + sbatchPath, + value, + iterations, + ], + outputs=[outputEppred], +) diff --git a/EAPM/Include/Blocks/HmmAlign.py b/EAPM/Include/Blocks/HmmAlign.py index d494eb1..b993faa 100644 --- a/EAPM/Include/Blocks/HmmAlign.py +++ b/EAPM/Include/Blocks/HmmAlign.py @@ -2,8 +2,6 @@ Module containing the HmmAlign block for the EAPM plugin as a nord3 implementation """ -import os - from HorusAPI import PluginVariable, SlurmBlock, VariableTypes # ==========================# @@ -52,6 +50,8 @@ def runHmmAlign(block: SlurmBlock): + import os + inputfasta = block.inputs.get("input_fasta", None) inputhmm = block.inputs.get("input_hmm", None) @@ -88,7 +88,12 @@ def runHmmAlign(block: SlurmBlock): os.system(f"cp {inputfasta} {folderName}") os.system(f"cp {inputhmm} {folderName}") - jobs = [f"hmmalign {folderName}/{inputhmm} {folderName}/{inputfasta}"] + if block.remote.isLocal: + hmmerExecutable = block.config.get("hmmer_path", "hmmer") + "/hmmalign" + else: + hmmerExecutable = "hmmalign" + + jobs = [f"{hmmerExecutable} {folderName}/{inputhmm} {folderName}/{inputfasta}"] from utils import launchCalculationAction @@ -103,6 +108,8 @@ def runHmmAlign(block: SlurmBlock): def finalAction(block: SlurmBlock): + import os + from utils import downloadResultsAction downloaded_path = downloadResultsAction(block) diff --git a/EAPM/Include/Blocks/HmmBuild.py b/EAPM/Include/Blocks/HmmBuild.py index 6d5a3cc..9f54e7c 100644 --- a/EAPM/Include/Blocks/HmmBuild.py +++ b/EAPM/Include/Blocks/HmmBuild.py @@ -2,8 +2,6 @@ Module containing the HmmBuild block for the EAPM plugin as a nord3 implementation """ -import os - from HorusAPI import PluginVariable, SlurmBlock, VariableTypes # ==========================# @@ -45,6 +43,8 @@ def runHmmBuild(block: SlurmBlock): + import os + input = block.inputs.get("input_msa", None) if "nord3" not in block.remote.host: @@ -76,7 +76,12 @@ def runHmmBuild(block: SlurmBlock): output = block.outputs.get("output", "output.hmm") - jobs = [f"hmmbuild {folderName}/{output} {folderName}/{input}"] + if block.remote.isLocal: + hmmerExecutable = block.config.get("hmmer_path", "hmmer") + "/hmmbuild" + else: + hmmerExecutable = "hmmbuild" + + jobs = [f"{hmmerExecutable} {folderName}/{output} {folderName}/{input}"] from utils import launchCalculationAction @@ -91,6 +96,8 @@ def runHmmBuild(block: SlurmBlock): def finalAction(block: SlurmBlock): + import os + from utils import downloadResultsAction downloaded_path = downloadResultsAction(block) diff --git a/EAPM/Include/Blocks/HmmScan.py b/EAPM/Include/Blocks/HmmScan.py index 7dc939a..208228f 100644 --- a/EAPM/Include/Blocks/HmmScan.py +++ b/EAPM/Include/Blocks/HmmScan.py @@ -2,8 +2,6 @@ Module containing the HmmScan block for the EAPM plugin as a nord3 implementation """ -import os - from HorusAPI import PluginVariable, SlurmBlock, VariableTypes # ==========================# @@ -50,6 +48,7 @@ def runHmmScan(block: SlurmBlock): + import os input = block.inputs.get("input_fasta", None) @@ -83,7 +82,12 @@ def runHmmScan(block: SlurmBlock): hmmDB = block.variables.get("hmm_db", None) output = block.outputs.get("output", "output.hmm") - jobs = [f"hmmscan {hmmDB} {folderName}/{input} -o {folderName}/{output}"] + if block.remote.isLocal: + hmmerExecutable = block.config.get("hmmer_path", "hmmer") + "/hmmscan" + else: + hmmerExecutable = "hmmscan" + + jobs = [f"{hmmerExecutable} {hmmDB} {folderName}/{input} -o {folderName}/{output}"] from utils import launchCalculationAction @@ -98,6 +102,8 @@ def runHmmScan(block: SlurmBlock): def finalAction(block: SlurmBlock): + import os + from utils import downloadResultsAction downloaded_path = downloadResultsAction(block) diff --git a/EAPM/Include/Blocks/HmmSearch.py b/EAPM/Include/Blocks/HmmSearch.py index 10d6fb2..4c46c30 100644 --- a/EAPM/Include/Blocks/HmmSearch.py +++ b/EAPM/Include/Blocks/HmmSearch.py @@ -2,8 +2,6 @@ Module containing the HmmSearch block for the EAPM plugin as a nord3 implementation """ -import os - from HorusAPI import PluginVariable, SlurmBlock, VariableTypes # ==========================# @@ -58,6 +56,8 @@ def runHmmSearch(block: SlurmBlock): + import os + input = block.inputs.get("input_hmm", None) if "nord3" not in block.remote.host: @@ -94,8 +94,13 @@ def runHmmSearch(block: SlurmBlock): "sequence_db", "/gpfs/projects/shared/public/AlphaFold/uniref90/uniref90.fa" ) + if block.remote.isLocal: + hmmerExecutable = block.config.get("hmmer_path", "hmmer") + "/hmmsearch" + else: + hmmerExecutable = "hmmsearch" + jobs = [ - f"hmmsearch --cpu {cpus} -E {evalue} {folderName}/{input} {sequenceDB} -o {folderName}/{output}" + f"{hmmerExecutable} --cpu {cpus} -E {evalue} {folderName}/{input} {sequenceDB} -o {folderName}/{output}" ] from utils import launchCalculationAction @@ -111,6 +116,8 @@ def runHmmSearch(block: SlurmBlock): def finalAction(block: SlurmBlock): + import os + from utils import downloadResultsAction downloaded_path = downloadResultsAction(block) diff --git a/EAPM/Include/Blocks/HmmSearchLocal.py b/EAPM/Include/Blocks/HmmSearchLocal.py index bd6da5e..b8eeffa 100644 --- a/EAPM/Include/Blocks/HmmSearchLocal.py +++ b/EAPM/Include/Blocks/HmmSearchLocal.py @@ -2,10 +2,7 @@ Module containing the HmmSearch block for the EAPM plugin as a local implementation """ -import os -import pyhmmer -from HorusAPI import PluginBlock, PluginVariable, VariableTypes, Extensions - +from HorusAPI import Extensions, PluginBlock, PluginVariable, VariableTypes # ==========================# # Variable inputs @@ -38,47 +35,51 @@ allowedValues=["domtbl"], ) + def runHmmSearch(block: PluginBlock): - + import os + + import pyhmmer + input = block.inputs.get("input_hmm", None) - + if input is None: raise Exception("No input hmm provided") - + if not os.path.exists(input): raise Exception(f"The input hmm file does not exist: {input}") - + try: with pyhmmer.plan7.HMMFile(input) as hmm_file: hmm = hmm_file.read() except Exception as e: raise Exception(f"Error reading the input hmm file: {e}") - + alphabet = pyhmmer.plan7.Alphabet.amino() background = pyhmmer.plan7.Background(alphabet) pipeline = pyhmmer.plan7.Pipeline(alphabet, background=background) - + sequenceDB = block.inputs.get("sequence_db", None) - + if sequenceDB is None: raise Exception("No sequence database provided") - + if not os.path.exists(sequenceDB): raise Exception(f"The sequence database file does not exist: {sequenceDB}") - + try: with pyhmmer.easel.SequenceFile(sequenceDB, digital=True, alphabet=alphabet) as seq_file: hits = pipeline.search_hmm(hmm, seq_file) except Exception as e: raise Exception(f"Error searching the sequence database: {e}") - + output = block.outputs.get("output", "output.domtbl") - + with open(output, "wb") as f: hits.write(f, format="domains") - + block.setOutput("outputVariable", output) - + hmmsearchLocalBlock = PluginBlock( name="HmmSearch Local", diff --git a/EAPM/Include/Blocks/JackHmmer.py b/EAPM/Include/Blocks/JackHmmer.py index f14d47d..b2f01f9 100644 --- a/EAPM/Include/Blocks/JackHmmer.py +++ b/EAPM/Include/Blocks/JackHmmer.py @@ -2,8 +2,6 @@ Module containing the JackHmmer block for the EAPM plugin as a nord3 implementation """ -import os - from HorusAPI import PluginVariable, SlurmBlock, VariableTypes # ==========================# @@ -44,17 +42,25 @@ id="sequence_db", name="Sequence DB", description="The sequence database to search", - type=VariableTypes.STRING, - defaultValue="/gpfs/projects/shared/public/AlphaFold/uniref90/uniref90.fa", + type=VariableTypes.FILE, + defaultValue="/apps/ACC/ALPHAFOLD/SRC/database/Alphafold/uniref90/uniref90.fasta", +) +folderNameVar = PluginVariable( + id="folder_name", + name="Folder name", + description="The folder name", + type=VariableTypes.FOLDER, + defaultValue="jackHmmer", ) def runJackHmmer(block: SlurmBlock): + import os inputfasta = block.inputs.get("input_fasta", None) - if "nord3" not in block.remote.host: - raise Exception("This block only works on Nord3.") + # if "nord3" not in block.remote.host or "glogin" not in block.remote.host: + # raise Exception("This block only works on Nord3 or mn.") if inputfasta is None: raise Exception("No input fasta provided") @@ -71,7 +77,7 @@ def runJackHmmer(block: SlurmBlock): if not removeExisting and os.path.exists(folderName): raise Exception( - "The folder {} already exists. Please, choose another name or remove it.".format( + "The folder {} already exists. Please, choose another name or remove it with the RemoveExistingFolder option.".format( folderName ) ) @@ -80,15 +86,19 @@ def runJackHmmer(block: SlurmBlock): os.makedirs(folderName, exist_ok=True) os.system(f"cp {inputfasta} {folderName}") + inputfasta = os.path.join(folderName, os.path.basename(inputfasta)) + output = block.outputs.get("output", "output.hmm") sequenceDB = block.variables.get( - "sequence_db", "/gpfs/projects/shared/public/AlphaFold/uniref90/uniref90.fa" + "sequence_db", "/apps/ACC/ALPHAFOLD/SRC/database/Alphafold/uniref90/uniref90.fasta" ) cpus = block.variables.get("cpus", 1) - jobs = [ - f"jackhmmer -o {folderName}/{output} --cpu {cpus} {folderName}/{inputfasta} {sequenceDB}" - ] + if block.remote.isLocal: + hmmerExecutable = block.config.get("hmmer_path", "hmmer") + "/jackhmmer" + else: + hmmerExecutable = "jackhmmer" + jobs = [f"{hmmerExecutable} -o {folderName}/{output} --cpu {cpus} {inputfasta} {sequenceDB}"] from utils import launchCalculationAction @@ -103,6 +113,8 @@ def runJackHmmer(block: SlurmBlock): def finalAction(block: SlurmBlock): + import os + from utils import downloadResultsAction downloaded_path = downloadResultsAction(block) @@ -122,6 +134,6 @@ def finalAction(block: SlurmBlock): finalAction=finalAction, description="Iteratively search a protein sequence against a protein database", inputs=[fastaInput], - variables=BSC_JOB_VARIABLES + [sequenceDBVar, removeExistingResults], + variables=BSC_JOB_VARIABLES + [sequenceDBVar, removeExistingResults, folderNameVar], outputs=[outputVariable], ) diff --git a/EAPM/Include/Blocks/MSA2HMM.py b/EAPM/Include/Blocks/MSA2HMM.py index dcdcf60..d1c0d7d 100644 --- a/EAPM/Include/Blocks/MSA2HMM.py +++ b/EAPM/Include/Blocks/MSA2HMM.py @@ -2,11 +2,7 @@ Module containing the MSA2HMM block for the EAPM plugin """ -import pyhmmer -import os - -from HorusAPI import PluginBlock, PluginVariable, VariableTypes, VariableGroup - +from HorusAPI import PluginBlock, PluginVariable, VariableGroup, VariableTypes # ==========================# # Variable inputs @@ -44,36 +40,39 @@ # ==========================# - def convertMSA2HMM(block: PluginBlock): """ Convert MSA to HMM """ + import os + + import pyhmmer # Loading plugin variables inputMSA = block.inputs.get("input_file_msa") if inputMSA is None: raise Exception("No input MSA provided") - + if not os.path.exists(inputMSA): raise Exception(f"The input MSA file does not exist: {inputMSA}") - + alphabet = pyhmmer.easel.Alphabet.amino() - + with pyhmmer.easel.MSAFile(inputMSA, digital=True, alphabet=alphabet) as msa_file: msa = msa_file.read() msa.name = b"input_msa" - + builder = pyhmmer.plan7.Builder(alphabet) background = pyhmmer.plan7.Background(alphabet) hmm, _, _ = builder.build_msa(msa, background) - + output = "output.hmm" with open(output, "wb") as output_file: hmm.write(output_file) - + block.setOutput("output_hmm", output) - + + convertMSAToHMMBlock = PluginBlock( name="MSA to HMM", description="Convert MSA files to HMM", diff --git a/EAPM/Include/Blocks/Mafft.py b/EAPM/Include/Blocks/Mafft.py index 473c19e..b1e764d 100644 --- a/EAPM/Include/Blocks/Mafft.py +++ b/EAPM/Include/Blocks/Mafft.py @@ -2,10 +2,7 @@ Module containing the Mafft block for the EAPM plugin """ -import Bio.AlignIO -import Bio.SeqIO - -from HorusAPI import Extensions, PluginBlock, PluginVariable, VariableTypes +from HorusAPI import PluginBlock, PluginVariable, VariableTypes # ==========================# # Variable inputs diff --git a/EAPM/Include/Blocks/PDBToMAE.py b/EAPM/Include/Blocks/PDBToMAE.py index a1ed9a9..2b29690 100644 --- a/EAPM/Include/Blocks/PDBToMAE.py +++ b/EAPM/Include/Blocks/PDBToMAE.py @@ -1,6 +1,3 @@ -import os -import shutil - from HorusAPI import PluginBlock, PluginVariable, VariableGroup, VariableTypes # Input variables @@ -45,19 +42,39 @@ def convertPDBToMAE(block: PluginBlock): + import os + import shutil + # Test if we have valid glide installation command = "echo $SCHRODINGER" output = block.remote.remoteCommand(command) + if output is None or output == "": raise Exception(f"No valid Schrodinger installation found on remote {block.remote.name}") else: print(f"Schrodinger installation found on remote {block.remote.name}: {output}") - run_command = output + "/run" + run_command = str(output) + "/run" import prepare_proteins - pdb_folder = block.inputs.get("pdb_folder", None) + if block.selectedInputGroup == singlePDBVariable.id: + pdb_file = block.inputs.get("single_pdb", None) + + if pdb_file is None: + raise Exception("No PDB file selected") + + if not os.path.isfile(pdb_file): + raise Exception(f"Invalid PDB file: {pdb_file}") + + if os.path.exists("tmp_ligand"): + shutil.rmtree("tmp_ligand") + os.mkdir("tmp_ligand") + shutil.copy(pdb_file, "tmp_ligand") + + pdb_folder = os.path.join(os.getcwd(), "tmp_ligand") + else: + pdb_folder = block.inputs.get("pdb_folder", None) if pdb_folder is None: raise Exception("No PDB folder selected") @@ -150,6 +167,12 @@ def mockSystem(command): if model.endswith(".mae"): os.rename(os.path.join(pdb_folder, model), os.path.join(mae_folder, model)) + elif block.remote.name == "Local": + for model in os.listdir(pdb_folder): + if model.endswith(".mae"): + # Move the MAE files to the output folder + shutil.move(os.path.join(pdb_folder, model), os.path.join(mae_folder, model)) + print( f"Sucessfully converted PDB files to MAE. Files converted: {len(os.listdir(mae_folder))}" ) @@ -161,12 +184,6 @@ def mockSystem(command): name="PDB to MAE", description="Convert PDB files to MAE for Glide", inputGroups=[ - VariableGroup( - id=structureVariable.id, - name=structureVariable.name, - description=structureVariable.description, - variables=[structureVariable], - ), VariableGroup( id=singlePDBVariable.id, name=singlePDBVariable.name, @@ -179,6 +196,12 @@ def mockSystem(command): description=pdbFolderVariable.description, variables=[pdbFolderVariable], ), + VariableGroup( + id=structureVariable.id, + name=structureVariable.name, + description=structureVariable.description, + variables=[structureVariable], + ), ], variables=[changeLigandNameVariable], outputs=[outputVariable], diff --git a/EAPM/Include/Blocks/PeleEAPM.py b/EAPM/Include/Blocks/PeleEAPM.py index ddb9136..636fec9 100644 --- a/EAPM/Include/Blocks/PeleEAPM.py +++ b/EAPM/Include/Blocks/PeleEAPM.py @@ -1,4 +1,4 @@ -import random + from HorusAPI import (PluginVariable, SlurmBlock, VariableGroup, VariableList, VariableTypes) @@ -9,7 +9,7 @@ name="PELE yaml", description="YAML file containing the PELE configuration", type=VariableTypes.FILE, - defaultValue="cst_input.yaml", + defaultValue="input.yaml", allowedValues=["yaml"], ) @@ -25,7 +25,7 @@ id="poses_folder", name="Best docking poses", description="Best docking poses to analyse", - type=VariableTypes.FOLDER + type=VariableTypes.FOLDER, ) glideOutputVariable = PluginVariable( @@ -41,7 +41,7 @@ id="folder_input_group", name="Folder input group", description="Input the model and ligand folders after a Dcoking Grid setup has been run", - variables=[modelFolderVariable, posesFolderVariable, yamlPELEFileVariable] + variables=[modelFolderVariable, posesFolderVariable, yamlPELEFileVariable], ) glideOutputGroup = VariableGroup( @@ -168,7 +168,7 @@ name="PELE separator", description="Separator for the PELE models and ligands", type=VariableTypes.STRING, - defaultValue="-", + defaultValue="@", category="PELE", ) @@ -326,7 +326,7 @@ description="Enable log file", type=VariableTypes.BOOLEAN, defaultValue=False, - category="PELE" + category="PELE", ) rescoringVariable = PluginVariable( @@ -335,7 +335,7 @@ description="Enable rescoring", type=VariableTypes.BOOLEAN, defaultValue=False, - category="PELE" + category="PELE", ) epsilonVariable = PluginVariable( @@ -344,7 +344,7 @@ description="TODO Epsilon description", type=VariableTypes.FLOAT, defaultValue=0.5, - category="PELE" + category="PELE", ) ligandEquilibrationCstVariable = PluginVariable( @@ -353,7 +353,7 @@ description="TODO Ligand equilibration cst description", type=VariableTypes.BOOLEAN, defaultValue=True, - category="PELE" + category="PELE", ) covalentSetupVariable = PluginVariable( @@ -362,7 +362,7 @@ description="Enable covalent setup", type=VariableTypes.BOOLEAN, defaultValue=False, - category="PELE" + category="PELE", ) nonbondedNewFlagVariable = PluginVariable( @@ -371,7 +371,7 @@ description="Enable nonbonded new flag", type=VariableTypes.BOOLEAN, defaultValue=False, - category="PELE" + category="PELE", ) onlyModelsVariable = PluginVariable( @@ -414,12 +414,12 @@ category="PELE", ) -membraneResiduesVariable= PluginVariable( +membraneResiduesVariable = PluginVariable( id="membrane_residues", name="Membrane residues", description="TODO membrane residues description", type=VariableTypes.LIST, - category="PELE" + category="PELE", ) biasToPointVariable = PluginVariable( @@ -427,7 +427,7 @@ name="Bias to point", description="TODO bias_to_point description", type=VariableTypes.LIST, - category="PELE" + category="PELE", ) comBias1Variable = PluginVariable( @@ -435,7 +435,7 @@ name="com bias1", description="TODO com_bias1 description", type=VariableTypes.LIST, - category="PELE" + category="PELE", ) comBias2Variable = PluginVariable( @@ -443,7 +443,7 @@ name="com bias2", description="TODO com_bias2 description", type=VariableTypes.LIST, - category="PELE" + category="PELE", ) ligandTemplateVariable = PluginVariable( @@ -478,35 +478,35 @@ id="model", name="Model", description="TODO model variable description", - type=VariableTypes.STRING + type=VariableTypes.STRING, ) ligandVariable = PluginVariable( id="ligand", name="Ligand", description="TODO ligand variable description", - type=VariableTypes.STRING + type=VariableTypes.STRING, ) chainVariable = PluginVariable( id="chain", name="Chain", description="TODO chain variable description", - type=VariableTypes.STRING + type=VariableTypes.STRING, ) residueVariable = PluginVariable( id="residue", name="Residue number", description="TODO residue number variable description", - type=VariableTypes.INTEGER + type=VariableTypes.INTEGER, ) atomNameVariable = PluginVariable( id="atom_name", name="Atom name", description="TODO atom name variable description", - type=VariableTypes.STRING + type=VariableTypes.STRING, ) # box_centers VariableList @@ -515,13 +515,7 @@ name="Box centers", description="TODO Box center variable description", category="PELE", - prototypes=[ - modelVariable, - ligandVariable, - chainVariable, - residueVariable, - atomNameVariable - ], + prototypes=[modelVariable, ligandVariable, chainVariable, residueVariable, atomNameVariable], ) # Outputs @@ -533,9 +527,13 @@ ) - def peleAction(block: SlurmBlock): if block.selectedInputGroup == "glide_output_group": + glide_outputr = block.inputs.get("glide_output") + # load the pickle file + + # with open(glide_outputr, "rb") as f: + # glide_output = pickle.load(f) glide_output = block.inputs.get("glide_output") poses_folder = glide_output.get("poses_folder") models_folder = glide_output.get("models_folder") @@ -549,7 +547,7 @@ def peleAction(block: SlurmBlock): atom_pairs = {} # Get all the variables from the block - boxCentersValue = block.variables.get("box_centers", []) + boxCentersValue = block.variables.get("box_centers", None) boxRadiusValue = block.variables.get("box_radius", 10) constraintsValue = block.variables.get("constraints", []) ligandIndexValue = block.variables.get("ligand_index", 1) @@ -558,7 +556,7 @@ def peleAction(block: SlurmBlock): peleIterationsValue = block.variables.get("pele_iterations", 5) equilibrationStepsValue = block.variables.get("equilibration_steps", 100) ligandEnergyGroupsValue = block.variables.get("ligand_energy_groups", []) - peleSeparatorValue = block.variables.get("pele_separator", "-") + peleSeparatorValue = block.variables.get("pele_separator", "@") usePeleffyValue = block.variables.get("use_peleffy", True) useSrunValue = block.variables.get("use_srun", True) energyByResidueValue = block.variables.get("energy_by_residue", False) @@ -579,7 +577,7 @@ def peleAction(block: SlurmBlock): onlyModelsValue = block.variables.get("only_models", []) onlyLigandsValue = block.variables.get("only_ligands", []) onlyCombinationsValue = block.variables.get("only_combinations", []) - nonbondedEnergyValue = block.variables.get('nonbonded_energy', {}) + nonbondedEnergyValue = block.variables.get("nonbonded_energy", {}) ligandTemplateValue = block.variables.get("ligand_template", "") seedValue = block.variables.get("seed", -1) logFileValue = block.variables.get("log_file", False) @@ -595,19 +593,31 @@ def peleAction(block: SlurmBlock): comBias2Value = block.variables.get("com_bias2", {}) # Parse spawningValue - validSpawnings = ['independent', 'inverselyProportional', 'epsilon', 'variableEpsilon', - 'independentMetric', 'UCB', 'FAST', 'ProbabilityMSM', 'MetastabilityMSM', - 'IndependentMSM'] - + validSpawnings = [ + "independent", + "inverselyProportional", + "epsilon", + "variableEpsilon", + "independentMetric", + "UCB", + "FAST", + "ProbabilityMSM", + "MetastabilityMSM", + "IndependentMSM", + ] + if spawningValue != None and spawningValue not in validSpawnings: - message = 'Spawning method %s not found.' % spawningValue - message = 'Allowed options are: ' + str(validSpawnings) - raise ValueError(message) + message = "Spawning method %s not found." % spawningValue + message = "Allowed options are: " + str(validSpawnings) + raise ValueError(message) # Parse energyByResidueValue - energy_by_residue_types = ['all', 'lennard_jones', 'sgb', 'electrostatic'] + energy_by_residue_types = ["all", "lennard_jones", "sgb", "electrostatic"] if energyByResidueTypeValue not in energy_by_residue_types: - raise ValueError('%s not found. Try: %s' % (energyByResidueTypeValue, energy_by_residue_types)) + raise ValueError( + "%s not found. Try: %s" % (energyByResidueTypeValue, energy_by_residue_types) + ) + import random # Parse seedValue if seedValue == -1: @@ -616,44 +626,49 @@ def peleAction(block: SlurmBlock): # Parse ligandEnergyGroups if not isinstance(ligandEnergyGroupsValue, type(None)): if not isinstance(ligandEnergyGroupsValue, dict): - raise ValueError('Ligand energy groups, must be given as a dictionary') - - # Parse box_centers + raise ValueError("Ligand energy groups, must be given as a dictionary") - box_centers = {} - for model in boxCentersValue: - box_centers[(model['model'], model['ligand'])] = (model['chain'], model['residue'], model['atom_name']) + # Parse box_centers + if not isinstance(boxCentersValue, type(None)) or boxCentersValue is not None: + box_centers = {} + for model in boxCentersValue: + box_centers[(model["model"], model["ligand"])] = ( + model["chain"], + model["residue"], + model["atom_name"], + ) + else: + box_centers = None # Parse skip_models if not isinstance(skipModelsValue, type(None)): if not isinstance(skipModelsValue, list): - raise ValueError('skip_models must be a list.') + raise ValueError("skip_models must be a list.") - # Parse skip_ligands if not isinstance(skipLigandsValue, type(None)): if not isinstance(skipLigandsValue, list): - raise ValueError('skip_ligands must be a list.') + raise ValueError("skip_ligands must be a list.") # Parse nonbonded_energy if not isinstance(nonbondedEnergyValue, type(None)): if not isinstance(nonbondedEnergyValue, dict): - raise ValueError('nonbonded_energy, must be given as a dictionary') + raise ValueError("nonbonded_energy, must be given as a dictionary") # Parse only_ligands if not isinstance(onlyLigandsValue, type(None)): if not isinstance(onlyLigandsValue, list): - raise ValueError('only_ligands must be a list.') - + raise ValueError("only_ligands must be a list.") + # Parse only_models if not isinstance(onlyModelsValue, type(None)): if not isinstance(onlyModelsValue, list): - raise ValueError('only_models must be a list.') + raise ValueError("only_models must be a list.") # Parse only_combinations if not isinstance(onlyCombinationsValue, type(None)): if not isinstance(onlyCombinationsValue, list): - raise ValueError('only_combinations must be a list.') + raise ValueError("only_combinations must be a list.") import prepare_proteins @@ -663,7 +678,7 @@ def peleAction(block: SlurmBlock): selections = block.variables.get("selections_list", []) if atom_pairs == {}: groups = [] - for model in models: + for model in models: atom_pairs[model] = {} for selection in selections: current_group = selection["group"] @@ -686,7 +701,7 @@ def peleAction(block: SlurmBlock): atom_pairs[model][ligandName] = [] atom_pairs[model][ligandName].append((protein_tuple, ligand_atom)) - cst_yaml = block.inputs.get("yaml_pele_file") + input_yaml = block.inputs.get("yaml_pele_file") cpus = block.variables.get("cpus", 48) peleFolderName = block.variables.get("pele_folder_name", "pele") @@ -694,7 +709,7 @@ def peleAction(block: SlurmBlock): jobs = models.setUpPELECalculation( peleFolderName, poses_folder, - cst_yaml, + input_yaml, box_radius=boxRadiusValue, iterations=peleIterationsValue, cpus=cpus, @@ -738,7 +753,7 @@ def peleAction(block: SlurmBlock): bias_to_point=biasToPointValue, com_bias1=comBias1Value, com_bias2=comBias2Value, - ligand_energy_groups=ligandEnergyGroupsValue + ligand_energy_groups=ligandEnergyGroupsValue, ) from utils import launchCalculationAction @@ -754,7 +769,7 @@ def peleAction(block: SlurmBlock): ) -def peleFinalAction(block: SlurmBlock):# +def peleFinalAction(block: SlurmBlock): # print("Pele finished") from utils import downloadResultsAction @@ -766,7 +781,6 @@ def peleFinalAction(block: SlurmBlock):# block.setOutput("pele_output_folder", peleFolderName) - from utils import BSC_JOB_VARIABLES blockVariables = BSC_JOB_VARIABLES + [ @@ -813,18 +827,20 @@ def peleFinalAction(block: SlurmBlock):# membraneResiduesVariable, biasToPointVariable, comBias1Variable, - comBias2Variable - + comBias2Variable, ] + def wrappedFunction(block: SlurmBlock): try: peleAction(block) except Exception as e: import traceback + print("Exception:", e) traceback.print_exc() + peleBlock = SlurmBlock( name="PELE", description="Run PELE", diff --git a/EAPM/Include/Blocks/PrepWizardEAPM.py b/EAPM/Include/Blocks/PrepWizardEAPM.py index 983973f..3fce5e6 100644 --- a/EAPM/Include/Blocks/PrepWizardEAPM.py +++ b/EAPM/Include/Blocks/PrepWizardEAPM.py @@ -2,9 +2,7 @@ Module containing the PrepWizard block for the EAPM plugin """ -import os - -from HorusAPI import PluginVariable, SlurmBlock, VariableTypes +from HorusAPI import PluginVariable, SlurmBlock, VariableGroup, VariableTypes # ==========================# # Variable inputs @@ -16,6 +14,26 @@ type=VariableTypes.FOLDER, defaultValue=None, ) +inputFilePW = PluginVariable( + name="Input File", + id="input_file", + description="File of the pdb to prepare.", + type=VariableTypes.FILE, + allowedValues=["pdb"], +) +folderVariableGroup = VariableGroup( + id="folder_variable_group", + name="Folder variable group", + description="Input folder with the models.", + variables=[inputFolderPW], +) +fileVariableGroup = VariableGroup( + id="file_output_variable_group", + name="PDB file group", + description="Input PDB file.", + variables=[inputFilePW], +) + # ==========================# # Variable outputs @@ -26,6 +44,13 @@ description="Folder containing the prepared proteins.", type=VariableTypes.FOLDER, ) +outputPDB = PluginVariable( + name="Output PDB", + id="out_pdb", + description="Last PDB of the Prepwizard.", + type=VariableTypes.FILE, + allowedValues=["pdb"], +) ############################## # Block's advanced variables # @@ -39,6 +64,7 @@ ) +# Variables phPW = PluginVariable( name="PH", id="ph", @@ -112,28 +138,41 @@ def prepWizardAction(block: SlurmBlock): Args: block (SlurmBlock): The block to run the action on. """ - # Loading plugin variables - inputFolder = block.inputs.get("input_folder", None) - if inputFolder is None: - raise Exception("No input folder provided.") + + import os + import time + + if block.selectedInputGroup == fileVariableGroup.id: + input_file = block.inputs.get(inputFilePW.id, None) + input_folder = "models" + if os.path.exists(input_folder): + input_folder = input_folder + "_" + str(time.time()) + os.makedirs(input_folder, exist_ok=True) + os.system(f"cp {input_file} {input_folder}") + elif block.selectedInputGroup == folderVariableGroup.id: + input_folder = block.inputs.get(inputFolderPW.id, None) + else: + raise Exception("No input selected") # Get prepWizard variables - folderName = block.variables.get("folder_name", "prepared_proteins") - ph = int(block.variables.get("ph", 7)) - epikPH = block.variables.get("epik_ph", False) - sampleWater = block.variables.get("sample_water", False) - removeHydrogens = block.variables.get("remove_hydrogens", False) - delWaterHbondCutOff = block.variables.get("del_water_hbond_cut_off", False) - fillLoops = block.variables.get("fill_loops", False) - protonationStates = block.variables.get("protonation_states", None) - noepik = block.variables.get("no_epik", False) - noProtAssign = block.variables.get("no_prot_assign", False) + folderName = block.variables.get(folderNameVariable.id, "prepared_proteins") + if os.path.exists(folderName): + folderName = folderName + "_" + str(time.time()) + ph = int(block.variables.get(phPW.id, 7)) + epikPH = block.variables.get(epikPHPW.id, False) + sampleWater = block.variables.get(sampleWaterPW.id, False) + removeHydrogens = block.variables.get(removeHydrogensPW.id, False) + delWaterHbondCutOff = block.variables.get(delWaterHbondCutOffPW.id, False) + fillLoops = block.variables.get(fillLoopsPW.id, False) + protonationStates = block.variables.get(protonationStatesPW.id, None) + noepik = block.variables.get(noepikPW.id, False) + noProtAssign = block.variables.get(noProtAssignPW.id, False) import prepare_proteins print("Loading pdbs files...") - models = prepare_proteins.proteinModels(inputFolder) + models = prepare_proteins.proteinModels(input_folder) print("Setting up PrepWizard Optimitzations...") @@ -173,11 +212,13 @@ def prepWizardAction(block: SlurmBlock): def downloadPrepWizardResults(block: SlurmBlock): + import os + from utils import downloadResultsAction downloadResultsAction(block) - folderName = block.variables.get("folder_name") + folderName = block.variables.get(folderNameVariable.id, "prepared_proteins") # Create the output folder containing the prepared proteins if not os.path.exists(folderName): @@ -193,7 +234,8 @@ def downloadPrepWizardResults(block: SlurmBlock): pdbPath = os.path.join(folderName + "_wizard", "output_models", model, file) shutil.copyfile(pdbPath, finalPath) - block.setOutput("prepared_proteins", folderName) + block.setOutput(outputPDB.id, finalPath) + block.setOutput(outputPW.id, folderName) from utils import BSC_JOB_VARIABLES @@ -218,6 +260,6 @@ def downloadPrepWizardResults(block: SlurmBlock): initialAction=prepWizardAction, finalAction=downloadPrepWizardResults, variables=block_variables, - inputs=[inputFolderPW], - outputs=[outputPW], + inputGroups=[folderVariableGroup, fileVariableGroup], + outputs=[outputPDB, outputPW], ) diff --git a/EAPM/Include/Blocks/Rbcavity.py b/EAPM/Include/Blocks/Rbcavity.py new file mode 100644 index 0000000..04b89ac --- /dev/null +++ b/EAPM/Include/Blocks/Rbcavity.py @@ -0,0 +1,93 @@ +""" +Module containing the rbcavity block for the EAPM plugin +""" + +from HorusAPI import PluginBlock, PluginVariable, VariableTypes + +# ==========================# +# Variable inputs +# ==========================# +inputPRMFile = PluginVariable( + name="Parameter file", + id="input_prm_file", + description="The input '.prm' file.", + type=VariableTypes.FILE, + defaultValue="parameter_file.prm", + allowedValues=["prm"], +) + +# ==========================# +# Variable outputs +# ==========================# +outputLog = PluginVariable( + name="Output log", + id="output_log", + description="The output log file.", + type=VariableTypes.FILE, + defaultValue="parameter_file.log", +) + + +############################## +# Other variables # +############################## +was = PluginVariable( + name="Was", + id="was", + description="Write docking cavities (plus distance grid) to .as file.", + type=VariableTypes.BOOLEAN, + defaultValue=True, +) +dumpInsight = PluginVariable( + name="Dump Insight", + id="dump_insight", + description="Dump InsightII/PyMOL grids for each cavity for visualisation.", + type=VariableTypes.BOOLEAN, + defaultValue=False, +) + + +# Align action block +def initialRbcavity(block: PluginBlock): + + if block.remote.name != "Local": + raise Exception("This block is only available for local execution.") + + # Loading plugin variables + input_PRMfile = block.inputs.get(inputPRMFile.id, None) + output_log = block.outputs.get(outputLog.id, "parameter_file.log") + + # rbcavity -was -d -r parameter_file.prm > parameter_file.log + command = "rbcavity " + if block.variables.get("was", True): + command += "-was " + if block.variables.get("dump_insight", False): + command += "-d " + command += f"-r {input_PRMfile} > {output_log}" + + print("Setting output of block to the results directory...") + + # subprocess the command + import subprocess + + completed_process = subprocess.run(command, shell=True, capture_output=True, text=True) + + # Get the output and error + output = completed_process.stdout + error = completed_process.stderr + + # Set the output + block.setOutput(outputLog.id, output_log) + + +rbCavityBlock = PluginBlock( + name="Rbcavity", + description="Calculate docking cavities. (For local)", + action=initialRbcavity, + variables=[ + was, + dumpInsight, + ], + inputs=[inputPRMFile], + outputs=[outputLog], +) diff --git a/EAPM/Include/Blocks/Rbdock.py b/EAPM/Include/Blocks/Rbdock.py new file mode 100644 index 0000000..38d0124 --- /dev/null +++ b/EAPM/Include/Blocks/Rbdock.py @@ -0,0 +1,128 @@ +""" +Module containing the rbdock block for the EAPM plugin +""" + +from HorusAPI import PluginBlock, PluginVariable, VariableTypes + +# ==========================# +# Variable inputs +# ==========================# +inputPRMFile = PluginVariable( + name="Parameter file", + id="input_prm_file", + description="The input '.prm' file.", + type=VariableTypes.FILE, + defaultValue="parameter_file.prm", + allowedValues=["prm"], +) +inputLigand = PluginVariable( + name="Ligand SD file", + id="input_ligand", + description="The input ligand SD file.", + type=VariableTypes.FILE, + allowedValues=["sd"], +) + +# ==========================# +# Variable outputs +# ==========================# +outputFile = PluginVariable( + name="Output File", + id="output_file", + description="The output file with the ligand docked.", + type=VariableTypes.FILE, + defaultValue="parameter_file", +) + + +############################## +# Other variables # +############################## +protoPrmFile = PluginVariable( + name="proto Prm File", + id="proto_prm_file", + description="The docking protocol parameter file.", + type=VariableTypes.FILE, + defaultValue="dock.prm", +) +nRuns = PluginVariable( + name="nRuns", + id="n_runs", + description="Number of runs/ligand (default=1).", + type=VariableTypes.INTEGER, + defaultValue=None, +) +allH = PluginVariable( + name="allH", + id="all_h", + description="Keep all hydrogens, read all hydrogens present (default=polar hydrogens only).", + type=VariableTypes.BOOLEAN, + defaultValue=False, +) + + +# Align action block +def initialRbdock(block: PluginBlock): + + import os + + if block.remote.name != "Local": + raise Exception("This block is only available for local execution.") + + # Loading plugin variables + input_PRMfile = block.inputs.get(inputPRMFile.id, None) + if input_PRMfile is None: + raise Exception("No parameter file provided.") + if not os.path.exists(input_PRMfile): + raise Exception("Parameter file does not exist.") + input_ligand = block.inputs.get(inputLigand.id, None) + out = "output_dock" + if input_ligand is None: + raise Exception("No ligand file provided.") + if not os.path.exists(input_ligand): + raise Exception("Ligand file does not exist.") + else: + out = os.path.basename(input_ligand).split(".")[0] + "_out" + output_file = block.outputs.get(outputFile.id, out) + + # rbcavity -was -d -r parameter_file.prm > parameter_file.log + command = f"rbdock -i {input_ligand} -o {output_file} -r {input_PRMfile} " + if block.variables.get("proto_prm_file", None) is not None: + command += f"-p {block.variables.get('proto_prm_file')} " + if block.variables.get("n_runs", None) is not None: + command += f"-n {block.variables.get('n_runs')} " + if block.variables.get("all_h", False): + command += "-allH " + + print("Setting output of block to the results directory...") + + # Subprocess the command + import subprocess + + completed_process = subprocess.run(command, shell=True, capture_output=True, text=True) + + # Get the output and error + output = completed_process.stdout + # Save the output and error + with open(f"{output_file}.out", "w") as f: + f.write(output) + error = completed_process.stderr + with open(f"{output_file}.err", "w") as f: + f.write(error) + + # Set the output + block.setOutput(outputFile.id, output_file) + + +rbDockBlock = PluginBlock( + name="Rbdock", + description="Calculate the docking. (For local)", + action=initialRbdock, + variables=[ + protoPrmFile, + nRuns, + allH, + ], + inputs=[inputPRMFile, inputLigand], + outputs=[outputFile], +) diff --git a/EAPM/Include/Blocks/SetupDockingGrid.py b/EAPM/Include/Blocks/SetupDockingGrid.py index dbdb4c4..73b3898 100644 --- a/EAPM/Include/Blocks/SetupDockingGrid.py +++ b/EAPM/Include/Blocks/SetupDockingGrid.py @@ -1,6 +1,4 @@ -import os - -from HorusAPI import InputBlock, PluginVariable, SlurmBlock, VariableGroup, VariableTypes +from HorusAPI import PluginVariable, SlurmBlock, VariableGroup, VariableTypes # Input variables modelFolderVariable = PluginVariable( @@ -45,6 +43,8 @@ # Action def glideDocking(block: SlurmBlock): + import os + import prepare_proteins models_folder = block.inputs.get("model_folder") @@ -146,7 +146,7 @@ def glideDocking(block: SlurmBlock): from utils import launchCalculationAction - launchCalculationAction(block, jobs, "glide", ["grid"]) + launchCalculationAction(block, jobs, "schrodinger", ["grid"]) def downloadGridResults(block: SlurmBlock): diff --git a/EAPM/Include/Blocks/SetupGlide.py b/EAPM/Include/Blocks/SetupGlide.py index b6153fd..908875e 100644 --- a/EAPM/Include/Blocks/SetupGlide.py +++ b/EAPM/Include/Blocks/SetupGlide.py @@ -1,6 +1,3 @@ -import os -import shutil - from HorusAPI import PluginVariable, SlurmBlock, VariableGroup, VariableTypes # Input variables @@ -54,6 +51,9 @@ def setupGlideDocking(block: SlurmBlock): + import os + import shutil + import prepare_proteins if block.selectedInputGroup == "folder_input_group": @@ -133,11 +133,13 @@ def setupGlideDocking(block: SlurmBlock): from utils import launchCalculationAction launchCalculationAction( - block, jobs, "glide", uploadFolders=["docking", "grid", relative_ligand_folder] + block, jobs, "schrodinger", uploadFolders=["docking", "grid", relative_ligand_folder] ) def downloadGlideDocking(block: SlurmBlock): + import os + from utils import downloadResultsAction downloadResultsAction(block) diff --git a/EAPM/Include/Blocks/TrimAlphafoldModels.py b/EAPM/Include/Blocks/TrimAlphafoldModels.py index 57df227..a24eae0 100644 --- a/EAPM/Include/Blocks/TrimAlphafoldModels.py +++ b/EAPM/Include/Blocks/TrimAlphafoldModels.py @@ -1,6 +1,3 @@ -import os -import shutil - from HorusAPI import PluginBlock, PluginVariable, VariableTypes resultsFolderAF = PluginVariable( @@ -37,6 +34,8 @@ def trimAlphaFoldModels(block: PluginBlock): + import os + import shutil # Get the models folder models_folder = block.inputs.get("results_folder", None) @@ -81,7 +80,7 @@ def trimAlphaFoldModels(block: PluginBlock): trimmed_folder = os.path.join(os.getcwd(), "trimmed_models") # Set the output - block.setOutput("trimmed_models", trimmed_folder) + block.setOutput(trimmedModelsOutputAF.id, trimmed_folder) outPdb = None for file in os.listdir(trimmed_folder): @@ -89,7 +88,7 @@ def trimAlphaFoldModels(block: PluginBlock): outPdb = os.path.join(trimmed_folder, file) break - block.setOutput("out_pdb", outPdb) + block.setOutput(outputPDBAF.id, outPdb) trimAlphaFoldModelsBlock = PluginBlock( diff --git a/EAPM/Include/Blocks/analyseGlideFelip.py b/EAPM/Include/Blocks/analyseGlideFelip.py new file mode 100644 index 0000000..2f5994b --- /dev/null +++ b/EAPM/Include/Blocks/analyseGlideFelip.py @@ -0,0 +1,104 @@ +import json +import os +import shutil + +import bsc_calculations +import pandas as pd +import prepare_proteins + +from HorusAPI import PluginBlock, PluginVariable, VariableTypes + +# ==========================# +# Variable inputs +# ==========================# +fasta_fileAF = PluginVariable( + name="Fasta file", + id="fasta_file", + description="The input fasta file.", + type=VariableTypes.FILE, + defaultValue=None, + allowedValues=["fasta"], +) + +# Output variables +outputModelsVariable = PluginVariable( + id="models", + name="Alphafold models", + description="The output models", + type=VariableTypes.FOLDER, +) + + +def finalAction(block: PluginBlock): + + models = prepare_proteins.proteinModels("models") + + with open("lig_atom_name.json", "r") as f: + lig_atom_name = json.load(f) + + triads = {} + triads["FeLip9"] = [111, 190, 167] + + triad_atoms = {} + triad_atoms["ser_OG"] = {} + triad_atoms["his_ND1"] = {} + triad_atoms["his_NE2"] = {} + triad_atoms["asp_OD1"] = {} + triad_atoms["asp_OD2"] = {} + + for model in models: # Iterate the models inside the library + S = triads[model][0] + H = triads[model][1] + D = triads[model][2] + for r in models.structures[ + model + ].get_residues(): # Iterate the residues for each Bio.PDB.Structure object + if r.id[1] == S: # Check that the residue matches the defined index + assert ( + r.resname == "SER" + ) # Assert that the residue has the correct residue identity + triad_atoms["ser_OG"][model] = ( + r.get_parent().id, + r.id[1], + "OG", + ) # Store the corresponsing tuple. + elif r.id[1] == H: + assert r.resname == "HIS" + triad_atoms["his_ND1"][model] = (r.get_parent().id, r.id[1], "ND1") + triad_atoms["his_NE2"][model] = (r.get_parent().id, r.id[1], "NE2") + elif r.id[1] == D: + assert r.resname == "ASP" + triad_atoms["asp_OD1"][model] = (r.get_parent().id, r.id[1], "OD1") + triad_atoms["asp_OD2"][model] = (r.get_parent().id, r.id[1], "OD2") + + atom_pairs = {} # Define the dictionary containing the atom pairs for each model + for model in models: + atom_pairs[model] = {} + atom_pairs[model]["PET"] = [] + atom_pairs[model]["PET"].append((triad_atoms["ser_OG"][model], lig_atom_name[2]["C1"])) + atom_pairs[model]["PET"].append((triad_atoms["ser_OG"][model], lig_atom_name[2]["C8"])) + atom_pairs[model]["PET"].append((triad_atoms["ser_OG"][model], lig_atom_name[4]["C1"])) + atom_pairs[model]["PET"].append((triad_atoms["ser_OG"][model], lig_atom_name[4]["C8"])) + atom_pairs[model]["PET"].append((triad_atoms["ser_OG"][model], lig_atom_name[6]["C1"])) + atom_pairs[model]["PET"].append((triad_atoms["ser_OG"][model], lig_atom_name[6]["C8"])) + atom_pairs[model]["PET"].append((triad_atoms["ser_OG"][model], lig_atom_name[8]["C1"])) + atom_pairs[model]["PET"].append((triad_atoms["ser_OG"][model], lig_atom_name[8]["C8"])) + + models.analyseDocking("docking", atom_pairs=atom_pairs) + + metric_distances = {} # Define the global dictionary + metric_distances["OG_C"] = {} # Define the metric nested dictionary + + for model in models: + metric_distances["OG_C"][model] = {} # Define the model nested dictionary + for ligand in models.docking_ligands[model]: + # Define the ligand nested dictionary with all the docking distances list + metric_distances["OG_C"][model][ligand] = models.getDockingDistances(model, ligand) + + models.combineDockingDistancesIntoMetrics(metric_distances) + + best_poses = models.getBestDockingPosesIteratively(metric_distances) + + models.extractDockingPoses(best_poses, "docking", "best_docking_poses") + + block.setOutput(outputModelsVariable.id, "best_docking_poses") diff --git a/EAPM/Include/Blocks/testBlock.py b/EAPM/Include/Blocks/testBlock.py new file mode 100644 index 0000000..ded4d28 --- /dev/null +++ b/EAPM/Include/Blocks/testBlock.py @@ -0,0 +1,74 @@ +from HorusAPI import PluginBlock, PluginVariable, VariableTypes + +# ==========================# +# Variable inputs +# ==========================# +fasta_fileAF = PluginVariable( + name="Fasta file", + id="fasta_file", + description="The input fasta file.", + type=VariableTypes.FILE, + defaultValue=None, + allowedValues=["fasta"], +) + +# ==========================# +# Variables +# ==========================# +outputAF = PluginVariable( + name="Alphafold simulation folder", + id="folder_name", + description="The name of the folder where the simulation will be stored.", + type=VariableTypes.STRING, + defaultValue="alphafold", +) +removeExistingResults = PluginVariable( + name="Remove existing results", + id="remove_existing_results", + description="Remove existing results", + type=VariableTypes.BOOLEAN, + defaultValue=False, +) + +# Output variables +outputModelsVariable = PluginVariable( + id="models", + name="Alphafold models", + description="The output models", + type=VariableTypes.FOLDER, +) + + +def finalAlhafoldAction(block: PluginBlock): + import os + import shutil + + resultsFolder = "alphafold" + downloaded_path = "/home/perry/data/acanella/testHorus/all_test/alphafold" + + output_models_folder = os.path.join(downloaded_path, resultsFolder, "output_models") + + if not os.path.exists("structures"): + os.makedirs("structures") + + rank = 0 + for model in os.listdir(output_models_folder): + if os.path.exists(f"{output_models_folder}/" + model + "/ranked_" + str(rank) + ".pdb"): + shutil.copyfile( + f"{output_models_folder}/" + model + "/ranked_" + str(rank) + ".pdb", + "structures/" + model + ".pdb", + ) + + block.setOutput(outputModelsVariable.id, "structures") + + +from utils import BSC_JOB_VARIABLES + +testBlock = PluginBlock( + name="Test block", + description="Test", + action=finalAlhafoldAction, + variables=BSC_JOB_VARIABLES + [outputAF, removeExistingResults], + inputs=[fasta_fileAF], + outputs=[outputModelsVariable], +) diff --git a/EAPM/Include/Configs/hmmerConfig.py b/EAPM/Include/Configs/hmmerConfig.py index 42e6858..451b6e2 100644 --- a/EAPM/Include/Configs/hmmerConfig.py +++ b/EAPM/Include/Configs/hmmerConfig.py @@ -15,10 +15,10 @@ def checkHmmerInstallation(block: PluginConfig): print("verifying HMMER installation") # Get the path to the mafft executable - hmmerPath = block.variables.get("HMMER_path") + hmmerPath = block.variables.get(hmmerPathVariable.id) # Check if the path is valid - if not os.path.isfile(hmmerPath): + if not os.path.isdir(hmmerPath): raise Exception("The HMMER executable path is not valid") diff --git a/EAPM/Include/Configs/mafftConfig.py b/EAPM/Include/Configs/mafftConfig.py index 17cb210..c27eda4 100644 --- a/EAPM/Include/Configs/mafftConfig.py +++ b/EAPM/Include/Configs/mafftConfig.py @@ -15,7 +15,7 @@ def checkMAFFTInstallation(block: PluginConfig): print("verifying MAFFT installation") # Get the path to the mafft executable - mafftPath = block.variables.get("MAFFT_path") + mafftPath = block.variables.get(mafftPathVariable.id) # Check if the path is valid if not os.path.isfile(mafftPath): diff --git a/EAPM/Include/utils.py b/EAPM/Include/utils.py index 8f8a92e..87dc0ac 100644 --- a/EAPM/Include/utils.py +++ b/EAPM/Include/utils.py @@ -19,6 +19,7 @@ def setup_bsc_calculations_based_on_horus_remote( job_name, program, modulePurge, + cpus_per_task, ): import bsc_calculations @@ -32,7 +33,13 @@ def setup_bsc_calculations_based_on_horus_remote( # If we are working with pele, only marenostrum and nord3 are allowed if program == "pele": - if cluster not in ["mn1.bsc.es", "mn2.bsc.es", "mn3.bsc.es", "nord3.bsc.es"]: + if cluster not in [ + "glogin1.bsc.es", + "glogin2.bsc.es", + "glogin3.bsc.es", + "glogin4.bsc.es", + "nord3.bsc.es", + ]: raise Exception("Pele can only be run on Marenostrum or Nord3") if cluster == "nord3.bsc.es": @@ -43,8 +50,8 @@ def setup_bsc_calculations_based_on_horus_remote( general_script=scriptName, scripts_folder=scriptName + "_scripts", ) - elif "mn" in cluster: - bsc_calculations.marenostrum.setUpPELEForMarenostrum( + elif "glogin" in cluster: + bsc_calculations.mn5.setUpPELEForMarenostrum( jobs, partition=partition, cpus=cpus, @@ -56,26 +63,27 @@ def setup_bsc_calculations_based_on_horus_remote( ## Define cluster # cte_power - if cluster == "plogin1.bsc.es": - bsc_calculations.cte_power.jobArrays( - jobs, - job_name=job_name, - partition=partition, - program=program, - script_name=scriptName, - gpus=cpus, - module_purge=modulePurge, - ) + # if cluster == "plogin1.bsc.es": + # bsc_calculations.cte_power.jobArrays( + # jobs, + # job_name=job_name, + # partition=partition, + # program=program, + # script_name=scriptName, + # gpus=cpus, + # module_purge=modulePurge, + # ) # marenostrum - elif "mn" in cluster: + elif "glogin" in cluster or "alogin" in cluster: print("Generating Marenostrum jobs...") - bsc_calculations.marenostrum.jobArrays( + bsc_calculations.mn5.jobArrays( jobs, job_name=job_name, partition=partition, program=program, script_name=scriptName, - cpus=cpus, + ntasks=cpus, + cpus_per_task=cpus_per_task, module_purge=modulePurge, ) # minotauro @@ -91,7 +99,7 @@ def setup_bsc_calculations_based_on_horus_remote( module_purge=modulePurge, ) # nord3 - elif cluster == "nord3.bsc.es": + elif "nord" in cluster: print("Generating nord3 jobs...") bsc_calculations.nord3.jobArrays( jobs, @@ -102,6 +110,18 @@ def setup_bsc_calculations_based_on_horus_remote( cpus=cpus, module_purge=modulePurge, ) + # cte-amd + elif "amdlogin" in cluster: + print("Generating cte-amd jobs...") + bsc_calculations.amd.jobArrays( + jobs, + job_name=job_name, + partition=partition, + program=program, + script_name=scriptName, + cpus=cpus, + # module_purge=modulePurge, + ) # powerpuff elif cluster == "powerpuff": print("Generating powerpuff girls jobs...") @@ -164,6 +184,7 @@ def launchCalculationAction( partition = block.variables.get("partition") cpus = block.variables.get("cpus") + cpus_per_task = block.variables.get("cpus_per_task") simulationName = block.variables.get("folder_name") scriptName = block.variables.get("script_name", "calculation_script.sh") @@ -184,6 +205,7 @@ def launchCalculationAction( simulationName, program, modulePurge, + cpus_per_task, ) # Read the environment variables @@ -406,8 +428,8 @@ def downloadResultsAction(block: SlurmBlock): id="partition", description="Partition where to lunch.", type=VariableTypes.STRING_LIST, - defaultValue="bsc_ls", - allowedValues=["bsc_ls", "debug"], + defaultValue="gp_bscls", + allowedValues=["gp_bscls", "gp_debug", "acc_bscls", "acc_debug", "debug", "bsc_ls"], category="Slurm configuration", ) @@ -420,6 +442,15 @@ def downloadResultsAction(block: SlurmBlock): category="Slurm configuration", ) +cpusPerTaskVariable = PluginVariable( + name="CPUs per task", + id="cpus_per_task", + description="Number of CPUs per task to use.", + type=VariableTypes.INTEGER, + defaultValue=1, + category="Slurm configuration", +) + removeFolderOnFinishVariable = PluginVariable( name="Remove remote folder on finish", id="remove_folder_on_finish", @@ -461,4 +492,5 @@ def downloadResultsAction(block: SlurmBlock): cpusVariable, environmentList, removeFolderOnFinishVariable, + cpusPerTaskVariable, ] diff --git a/EAPM/config/eapm.json b/EAPM/config/eapm.json index fd03e90..f9593d4 100644 --- a/EAPM/config/eapm.json +++ b/EAPM/config/eapm.json @@ -1,4 +1,4 @@ { - "mafft_path": "/home/albertcs/miniconda3/bin/mafft", - "hmmer_path": "/gpfs/projects/bsc72/conda_envs/hmm/bin/hmmsearch" + "mafft_path": "/home/perry/miniconda3/envs/horus/bin/mafft", + "hmmer_path": "/home/perry/miniconda3/envs/horus/bin" } \ No newline at end of file diff --git a/EAPM/config/eapm_Local.json b/EAPM/config/eapm_Local.json new file mode 100644 index 0000000..298a495 --- /dev/null +++ b/EAPM/config/eapm_Local.json @@ -0,0 +1,4 @@ +{ + "mafft_path": "MAFFT", + "hmmer_path": "HMMER" +} \ No newline at end of file diff --git a/EAPM/config/eapm_acc.json b/EAPM/config/eapm_acc.json new file mode 100644 index 0000000..298a495 --- /dev/null +++ b/EAPM/config/eapm_acc.json @@ -0,0 +1,4 @@ +{ + "mafft_path": "MAFFT", + "hmmer_path": "HMMER" +} \ No newline at end of file diff --git a/EAPM/config/eapm_nord3_test.json b/EAPM/config/eapm_nord3_test.json new file mode 100644 index 0000000..298a495 --- /dev/null +++ b/EAPM/config/eapm_nord3_test.json @@ -0,0 +1,4 @@ +{ + "mafft_path": "MAFFT", + "hmmer_path": "HMMER" +} \ No newline at end of file diff --git a/EAPM/plugin.meta b/EAPM/plugin.meta index 62dacb3..737f1dd 100644 --- a/EAPM/plugin.meta +++ b/EAPM/plugin.meta @@ -6,10 +6,10 @@ "pluginFile": "EAPM.py", "dependencies": [ "pyhmmer", + "numpy", "pandas", "scipy", "pyyaml", - "numpy", "matplotlib", "seaborn", "mdtraj", @@ -21,6 +21,20 @@ "git+https://github.com/Martin-Floor/bsc_calculations.git", "git+https://github.com/Martin-Floor/prepare_proteins.git", "git+https://github.com/Martin-Floor/PELE_scripts.git", - "biopython==1.81" + "biopython==1.81", + "scikit_learn", + "biopython", + "shap", + "matplotlib", + "openpyxl", + "pyod", + "combo", + "mlflow", + "optuna", + "transformers", + "optuna_integration", + "lightning", + "peft", + "datasets" ] } diff --git a/EAPM/preinst.sh b/EAPM/preinst.sh new file mode 100644 index 0000000..f33cc76 --- /dev/null +++ b/EAPM/preinst.sh @@ -0,0 +1,3 @@ +# pip install "pycaret[analysis, models]" --target deps + +# pip install "werkzeug<=2.3.0"