Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: nosubmit runs with default cluster config #34

Merged
merged 9 commits into from
Nov 3, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,6 @@ Snakemake

#temp test out
examples/CyclophilinD_data/abfe/
examples/abfe_out*
examples/abfe_*out*
examples/output
/src/abfe/_version.py
34 changes: 34 additions & 0 deletions environment_dev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: abfe
channels:
- bioconda
- openbiosim
- conda-forge
- defaults
dependencies:
- python>=3.8
- pip
- conda-build

- snakemake=7.8.5
- tabulate=0.8.10

- gromacs>=2022.2

- matplotlib
- numpy=1.22.4
- scipy=1.7.3
- pandas

- mdanalysis
- parmed
- pdbfixer
- biosimspace
- openff-toolkit
- openff-interchange
- black

- pip:
- alchemlyb==2.0.0
- pymbar==4.0.1
- MDRestraintsGenerator

82 changes: 56 additions & 26 deletions src/abfe/calculate_abfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,33 @@
from abfe.scripts import final_receptor_results


def calculate_abfe(protein_pdb_path: str, ligand_sdf_paths: List[str], out_root_folder_path: str,
approach_name: str = "", cofactor_sdf_path: str = None,
n_cores_per_job: int = 8, num_jobs_receptor_workflow: int = None, num_jobs_per_ligand: int = 40, num_replicas: int = 3, small_mol_ff="openff",
submit: bool = False, use_gpu: bool = True, hybrid_job: bool = True, cluster_config: dict = {}):
def calculate_abfe(
protein_pdb_path: str,
ligand_sdf_paths: List[str],
out_root_folder_path: str,
approach_name: str = "",
cofactor_sdf_path: str = None,
n_cores_per_job: int = 8,
num_jobs_receptor_workflow: int = None,
num_jobs_per_ligand: int = 40,
num_replicas: int = 3,
small_mol_ff="openff",
submit: bool = False,
use_gpu: bool = True,
hybrid_job: bool = True,
cluster_config: dict = {},
):
orig_dir = os.getcwd()
conf = {}

# IO:
## Input standardization
conf["input_protein_pdb_path"] = os.path.abspath(protein_pdb_path)
conf["input_ligands_sdf_path"] = [os.path.abspath(ligand_sdf_path) for ligand_sdf_path in ligand_sdf_paths]
conf["input_ligands_sdf_path"] = [
os.path.abspath(ligand_sdf_path) for ligand_sdf_path in ligand_sdf_paths
]

if (cofactor_sdf_path is not None):
if cofactor_sdf_path is not None:
conf["input_cofactor_sdf_path"] = os.path.abspath(cofactor_sdf_path)
else:
conf["input_cofactor_sdf_path"] = None
Expand All @@ -28,47 +42,63 @@ def calculate_abfe(protein_pdb_path: str, ligand_sdf_paths: List[str], out_root_

## Generate output folders
for dir_path in [conf["out_approach_path"]]:
if (not os.path.isdir(dir_path)):
if not os.path.isdir(dir_path):
os.mkdir(dir_path)

# Prepare Input / Parametrize
os.chdir(conf["out_approach_path"])

conf["ligand_names"] = [os.path.splitext(os.path.basename(sdf))[0] for sdf in conf["input_ligands_sdf_path"]]
conf["num_jobs"] = num_jobs_receptor_workflow if (num_jobs_receptor_workflow is not None) else len(conf["ligand_names"]) * num_replicas * 2
conf["ligand_names"] = [
os.path.splitext(os.path.basename(sdf))[0]
for sdf in conf["input_ligands_sdf_path"]
]
conf["num_jobs"] = (
num_jobs_receptor_workflow
if (num_jobs_receptor_workflow is not None)
else len(conf["ligand_names"]) * num_replicas * 2
)
conf["num_replica"] = num_replicas
conf['build_system'] = True
conf["build_system"] = True
conf["small_mol_ff"] = small_mol_ff

print("Prepare")
print("\tstarting preparing ABFE-ligand file structur")
build_ligand_flows(input_ligand_paths=conf["input_ligands_sdf_path"],
input_protein_path=conf["input_protein_pdb_path"],
input_cofactor_path=conf["input_cofactor_sdf_path"],
out_root_path=conf["out_approach_path"],
num_max_thread=n_cores_per_job,
num_replicas=num_replicas, num_jobs=num_jobs_per_ligand,
cluster_config=cluster_config,
use_gpu=use_gpu, hybrid_job=hybrid_job)
build_ligand_flows(
input_ligand_paths=conf["input_ligands_sdf_path"],
input_protein_path=conf["input_protein_pdb_path"],
input_cofactor_path=conf["input_cofactor_sdf_path"],
out_root_path=conf["out_approach_path"],
num_max_thread=n_cores_per_job,
num_replicas=num_replicas,
num_jobs=num_jobs_per_ligand,
cluster_config=cluster_config,
use_gpu=use_gpu,
hybrid_job=hybrid_job,
)

print("\tstarting preparing ABFE-Approach file structure: ", out_root_folder_path)
expected_out_paths = int(num_replicas) * len(conf["ligand_names"])
result_paths = glob.glob(conf["out_approach_path"] + "/*/*/dG*tsv")

job_approach_file_path= None
if (len(result_paths) != expected_out_paths):
job_approach_file_path = None
if len(result_paths) != expected_out_paths:
print("\tBuild approach struct")
job_approach_file_path = build_approach_flow(approach_name=approach_name,
num_jobs=conf["num_jobs"],
conf=conf, submit=submit,
cluster_config=cluster_config)
job_approach_file_path = build_approach_flow(
approach_name=approach_name,
num_jobs=conf["num_jobs"],
conf=conf,
submit=submit,
cluster_config=cluster_config,
)
print("Do")
print("\tSubmit Job - ID: ", job_approach_file_path)
# Final gathering
print("\tAlready got results?: " + str(len(result_paths)))
if (len(result_paths) > 0):
if len(result_paths) > 0:
print("Trying to gather ready results", out_root_folder_path)
final_receptor_results.get_final_results(out_dir=out_root_folder_path, in_root_dir=out_root_folder_path)
final_receptor_results.get_final_results(
out_dir=out_root_folder_path, in_root_dir=out_root_folder_path
)

print()
os.chdir(orig_dir)
82 changes: 56 additions & 26 deletions src/abfe/calculate_abfe_gmx.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,76 +10,106 @@

log = logging.getLogger(__file__)
log.setLevel(logging.INFO)
def calculate_abfe_gmx(input_dir:str, out_root_folder_path: str, approach_name: str = "",
n_cores_per_job: int = 8, num_jobs_receptor_workflow: int = None, num_jobs_per_ligand: int = 40, num_replicas: int = 3,
submit: bool = False, use_gpu: bool = True, hybrid_job: bool = True, cluster_config: dict = {}):


def calculate_abfe_gmx(
input_dir: str,
out_root_folder_path: str,
approach_name: str = "",
n_cores_per_job: int = 8,
num_jobs_receptor_workflow: int = None,
num_jobs_per_ligand: int = 40,
num_replicas: int = 3,
submit: bool = False,
use_gpu: bool = True,
hybrid_job: bool = True,
cluster_config: dict = {},
):
orig_dir = os.getcwd()
conf = {}

# IO:
## Input standardization
conf["input_path"] = os.path.abspath(input_dir)
conf['build_system'] = False
conf["build_system"] = False
conf["out_approach_path"] = os.path.abspath(out_root_folder_path)
conf["small_mol_ff"] = "custom"

## Generate output folders
for dir_path in [conf["out_approach_path"]]:
if (not os.path.isdir(dir_path)):
if not os.path.isdir(dir_path):
os.mkdir(dir_path)

# Prepare Input / Parametrize
os.chdir(conf["out_approach_path"])

#get Ligands:
conf["num_jobs"] = num_jobs_receptor_workflow if (num_jobs_receptor_workflow is not None) else len(os.listdir(conf["input_path"])) * num_replicas * 2
# get Ligands:
conf["num_jobs"] = (
num_jobs_receptor_workflow
if (num_jobs_receptor_workflow is not None)
else len(os.listdir(conf["input_path"])) * num_replicas * 2
)
conf["num_replica"] = num_replicas

ligand_dirs = [conf["input_path"]+"/"+d for d in os.listdir(conf["input_path"]) if(os.path.isdir(conf["input_path"]+"/"+d))]
ligand_dirs = [
conf["input_path"] + "/" + d
for d in os.listdir(conf["input_path"])
if (os.path.isdir(conf["input_path"] + "/" + d))
]
print("Found ligands: ")
log.info("Found ligands:")
conf["ligand_names"] = []
for ligand_dir in ligand_dirs:
ligand_name = os.path.basename(ligand_dir)
print("\t", ligand_name)
log.info("\t"+ligand_name)
log.info("\t" + ligand_name)

new_lig_dir = conf["out_approach_path"] + "/" + ligand_name

if(not os.path.exists(new_lig_dir)):
if not os.path.exists(new_lig_dir):
os.mkdir(new_lig_dir)

if(not os.path.exists(new_lig_dir+"/input")):
shutil.copytree(ligand_dir, new_lig_dir+"/input")
if not os.path.exists(new_lig_dir + "/input"):
shutil.copytree(ligand_dir, new_lig_dir + "/input")

conf["ligand_names"].append(ligand_name)
build_replicas_simulation_flow(out_ligand_path=new_lig_dir,
input_ligand_path=new_lig_dir+"/input",
ligand_name=ligand_name,
num_max_thread=n_cores_per_job,
num_replicas=num_replicas, cluster_config=cluster_config, submit=False,
num_jobs=num_jobs_per_ligand,
use_gpu=use_gpu, hybrid_job=hybrid_job)
build_replicas_simulation_flow(
out_ligand_path=new_lig_dir,
input_ligand_path=new_lig_dir + "/input",
ligand_name=ligand_name,
num_max_thread=n_cores_per_job,
num_replicas=num_replicas,
cluster_config=cluster_config,
submit=False,
num_jobs=num_jobs_per_ligand,
use_gpu=use_gpu,
hybrid_job=hybrid_job,
)

log.info("\tstarting preparing ABFE-Approach file structur: ", out_root_folder_path)
expected_out_paths = int(num_replicas) * len(conf["ligand_names"])
result_paths = glob.glob(conf["out_approach_path"] + "/*/*/dG*tsv")

if (len(result_paths) != expected_out_paths):
if len(result_paths) != expected_out_paths:
print("\tBuild approach struct")

job_approach_file_path = build_approach_flow(approach_name=approach_name,
num_jobs=conf["num_jobs"],
conf=conf, submit=submit,
cluster_config=cluster_config)
job_approach_file_path = build_approach_flow(
approach_name=approach_name,
num_jobs=conf["num_jobs"],
conf=conf,
submit=submit,
cluster_config=cluster_config,
)
print("Do")
print("\tSubmit Job - ID: ", job_approach_file_path)

# Final gathering
print("\tAlready got results?: " + str(len(result_paths)))
if (len(result_paths) > 0):
if len(result_paths) > 0:
print("Trying to gather ready results", out_root_folder_path)
final_receptor_results.get_final_results(out_dir=out_root_folder_path, in_root_dir=out_root_folder_path)
final_receptor_results.get_final_results(
out_dir=out_root_folder_path, in_root_dir=out_root_folder_path
)

print()
os.chdir(orig_dir)
9 changes: 4 additions & 5 deletions src/abfe/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@


std_conf = {
#Gromacs Kernels
# Gromacs Kernels
"gmx_kernel_cpu": template.gmx_submit_kernels_path + "/def_cpu_job.sh",
"gmx_kernel_cpu_cont": template.gmx_submit_kernels_path + "/def_cpu_job_cont.sh",
"gmx_kernel_gpu": template.gmx_submit_kernels_path + "/def_gpu_job.sh",
"gmx_kernel_gpu_cont": template.gmx_submit_kernels_path + "/def_gpu_job_cont.sh",

#GMX flag addition
"gmx_add_flag":"",
}
# GMX flag addition
"gmx_add_flag": "",
}
Loading