Skip to content
This repository has been archived by the owner on Oct 19, 2023. It is now read-only.

Commit

Permalink
REINVENT v3.2
Browse files Browse the repository at this point in the history
  • Loading branch information
GuoJeff committed Jun 1, 2022
1 parent e2463b8 commit b7324d2
Show file tree
Hide file tree
Showing 102 changed files with 1,973 additions and 835 deletions.
34 changes: 34 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM docker.io/continuumio/conda-ci-linux-64-python3.7:latest

USER root

RUN apt-get update && \
apt-get -y install rsync procps && \
wget https://sourceforge.net/projects/lmod/files/lua-5.1.4.9.tar.bz2 && \
tar xf lua-5.1.4.9.tar.bz2 && \
cd lua-5.1.4.9 && \
./configure --prefix=/opt/apps/lua/5.1.4.9 && \
make; make install && \
cd /opt/apps/lua; ln -s 5.1.4.9 lua && \
ln -s /opt/apps/lua/lua/bin/lua /usr/local/bin && \
ln -s /opt/apps/lua/lua/bin/luac /usr/local/bin && \
cd; wget https://sourceforge.net/projects/lmod/files/Lmod-8.2.tar.bz2 && \
tar xf Lmod-8.2.tar.bz2 && \
cd Lmod-8.2; ./configure --prefix=/opt/apps --with-fastTCLInterp=no && \
make install && \
ln -s /opt/apps/lmod/lmod/init/profile /etc/profile.d/z00_lmod.sh

ENV LMOD_ROOT=/opt/apps/lmod \
LMOD_PKG=/opt/apps/lmod/lmod \
LMOD_VERSION=8.2 \
LMOD_CMD=/opt/apps/lmod/lmod/libexec/lmod \
LMOD_DIR=/opt/apps/lmod/lmod/libexec \
BASH_ENV=/opt/apps/lmod/lmod/init/bash

COPY . /reinventcli/

WORKDIR /reinventcli

RUN conda update -n base -c defaults conda && \
conda env update --name=base --file=reinvent.yml && \
chmod -R "a+rx" /reinventcli
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
REINVENT 3.1
REINVENT 3.2
=================================================================================================================

Installation
Expand All @@ -12,7 +12,7 @@ Installation

4. Activate the environment:

$ conda activate reinvent.v3.0
$ conda activate reinvent.v3.2

5. Use the tool.

Expand Down
11 changes: 10 additions & 1 deletion configs/example.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
"AZGARD_EXECUTOR_SCRIPT_PATH": "/<your_path>/executor.py",
"AZGARD_ENV_PATH": "/<your_path>/miniconda3/envs/AZgard/bin/python",
"AZGARD_DEBUG": true
},
"ICOLOS": {
"ICOLOS_EXECUTOR_PATH": "/<your_path>/miniconda3/envs/icolosprod/bin/icolos",
"ICOLOS_DEBUG": true
},
"AIZYNTH": {
"CONFIG": "/projects/mai/synthesisplanning/minimal_config.yml"
}
},
"ENVIRONMENTAL_VARIABLES": {
Expand All @@ -28,5 +35,7 @@
"ACTIVITY_CLASSIFICATION": "",
"SMILES_SET_PATH": "",
"PRIOR_PATH": "",
"LIBINVENT_PRIOR_PATH": ""
"LIBINVENT_PRIOR_PATH": "",
"SMILES_SET_LINK_INVENT_PATH":"",
"LINK_INVENT_PRIOR_PATH": ""
}
39 changes: 29 additions & 10 deletions input.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,38 @@

import sys
import json
import argparse
from pathlib import Path
from running_modes.manager import Manager


if __name__ == "__main__":
DEFAULT_BASE_CONFIG_PATH = (Path(__file__).parent / 'configs/config.json').resolve()

parser = argparse.ArgumentParser(description='Run Reinvent.')
parser.add_argument(
'--base_config', type=str, default=DEFAULT_BASE_CONFIG_PATH,
help='Path to basic configuration for Reinvent environment.'
)
parser.add_argument(
'run_config', type=str,
help='Path to configuration json file for this run.'
)

with open(sys.argv[1]) as f:
json_input = f.read().replace('\r', '').replace('\n', '')

configuration = {}
def read_json_file(path):
with open(path) as f:
json_input = f.read().replace('\r', '').replace('\n', '')
try:
configuration = json.loads(json_input)
except (ValueError, KeyError, TypeError):
print("JSON format error")
else:
manager = Manager(configuration)
manager.run()
return json.loads(json_input)
except (ValueError, KeyError, TypeError) as e:
print(f"JSON format error in file ${path}: \n ${e}")


if __name__ == "__main__":
args = parser.parse_args()

base_config = read_json_file(args.base_config)
run_config = read_json_file(args.run_config)

manager = Manager(base_config, run_config)
manager.run()
4 changes: 0 additions & 4 deletions pytest.ini

This file was deleted.

8 changes: 4 additions & 4 deletions reinvent.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: reinvent.v3.0
name: reinvent.v3.2
channels:
- rdkit
- pytorch
Expand Down Expand Up @@ -211,9 +211,9 @@ dependencies:
- markdown==3.2.1
- opt-einsum==3.2.0
- protobuf==3.11.3
- reinvent-chemistry==0.0.40
- reinvent-models==0.0.12
- reinvent-scoring==0.0.57
- reinvent-chemistry==0.0.50
- reinvent-models==0.0.15rc1
- reinvent-scoring==0.0.73
- tensorboard==1.15.0
- tensorflow==1.15.2
- tensorflow-estimator==1.15.1
Expand Down
3 changes: 3 additions & 0 deletions running_modes/.directory
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[Dolphin]
Timestamp=2022,4,8,15,57,33
Version=3
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from running_modes.automated_curriculum_learning.actions.base_action import BaseAction
from running_modes.automated_curriculum_learning.actions.base_sample_action import BaseSampleAction
from running_modes.automated_curriculum_learning.actions.lib_invent_sample_model import LibInventSampleModel
from running_modes.automated_curriculum_learning.actions.link_invent_sample_model import LinkInventSampleModel
11 changes: 11 additions & 0 deletions running_modes/automated_curriculum_learning/actions/base_action.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import abc
from running_modes.automated_curriculum_learning.logging.base_logger import BaseLogger


class BaseAction(abc.ABC):
def __init__(self, logger=None):
"""
(Abstract) Initializes an action.
:param logger: An optional logger instance.
"""
self.logger: BaseLogger = logger
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import numpy as np
from running_modes.automated_curriculum_learning.actions import BaseAction


class BaseSampleAction(BaseAction):

def _get_indices_of_unique_smiles(self, smiles: [str]) -> np.array:
"""Returns an np.array of indices corresponding to the first entries in a list of smiles strings"""
_, idxs = np.unique(smiles, return_index=True)
sorted_indices = np.sort(idxs)
return sorted_indices
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from typing import List

import numpy as np
from reinvent_chemistry import Conversions
from reinvent_chemistry.library_design import BondMaker, AttachmentPoints
from reinvent_models.lib_invent.models.dataset import Dataset
from reinvent_models.model_factory.generative_model_base import GenerativeModelBase
from torch.utils.data import DataLoader

from running_modes.automated_curriculum_learning.actions import BaseSampleAction
from running_modes.automated_curriculum_learning.dto.sampled_sequences_dto import SampledSequencesDTO


class LibInventSampleModel(BaseSampleAction):

def __init__(self, model: GenerativeModelBase, batch_size: int, logger=None, randomize=False, sample_uniquely=True):
"""
Creates an instance of SampleModel.
:params model: A model instance (better in scaffold_decorating mode).
:params batch_size: Batch size to use.
:return:
"""
super().__init__(logger)
self.model = model
self._batch_size = batch_size
self._bond_maker = BondMaker()
self._attachment_points = AttachmentPoints()
self._randomize = randomize
self._conversions = Conversions()
self._sample_uniquely = sample_uniquely

def run(self, scaffold_list: List[str]) -> List[SampledSequencesDTO]:
"""
Samples the model for the given number of SMILES.
:params scaffold_list: A list of scaffold SMILES.
:return: A list of SampledSequencesDTO.
"""
scaffold_list = self._randomize_scaffolds(scaffold_list) if self._randomize else scaffold_list
clean_scaffolds = [self._attachment_points.remove_attachment_point_numbers(scaffold) for scaffold in scaffold_list]
dataset = Dataset(clean_scaffolds, self.model.get_vocabulary().scaffold_vocabulary,
self.model.get_vocabulary().scaffold_tokenizer)
dataloader = DataLoader(dataset, batch_size=len(dataset), shuffle=False, collate_fn=Dataset.collate_fn)

for batch in dataloader:
sampled_sequences = []

for _ in range(self._batch_size):
scaffold_seqs, scaffold_seq_lengths = batch
packed = self.model.sample(scaffold_seqs, scaffold_seq_lengths)
for scaffold, decoration, nll in packed:
sampled_sequences.append(SampledSequencesDTO(scaffold, decoration, nll))

if self._sample_uniquely:
sampled_sequences = self._sample_unique_sequences(sampled_sequences)

return sampled_sequences

def _sample_unique_sequences(self, sampled_sequences: List[SampledSequencesDTO]) -> List[SampledSequencesDTO]:
strings = ["".join([ss.input, ss.output]) for index, ss in enumerate(sampled_sequences)]
unique_idxs = self._get_indices_of_unique_smiles(strings)
sampled_sequences_np = np.array(sampled_sequences)
unique_sampled_sequences = sampled_sequences_np[unique_idxs]
return unique_sampled_sequences.tolist()

def _randomize_scaffolds(self, scaffolds: List[str]):
scaffold_mols = [self._conversions.smile_to_mol(scaffold) for scaffold in scaffolds]
randomized = [self._bond_maker.randomize_scaffold(mol) for mol in scaffold_mols]
return randomized
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from typing import List

import numpy as np
from reinvent_chemistry import Conversions, TransformationTokens
from reinvent_chemistry.library_design import BondMaker, AttachmentPoints
from reinvent_models.link_invent.dataset.dataset import Dataset
from reinvent_models.model_factory.generative_model_base import GenerativeModelBase
from torch.utils.data import DataLoader

from running_modes.automated_curriculum_learning.actions import BaseSampleAction
from running_modes.automated_curriculum_learning.dto.sampled_sequences_dto import SampledSequencesDTO


class LinkInventSampleModel(BaseSampleAction):
def __init__(self, model: GenerativeModelBase, batch_size: int, logger=None, randomize=False, sample_uniquely=True):
"""
Creates an instance of SampleModel.
:params model: A model instance.
:params batch_size: Batch size to use.
:return:
"""
super().__init__(logger)
self.model = model
self._batch_size = batch_size
self._bond_maker = BondMaker()
self._randomize = randomize
self._sample_uniquely = sample_uniquely

self._conversions = Conversions()
self._attachment_points = AttachmentPoints()
self._tokens = TransformationTokens()

def run(self, warheads_list: List[str]) -> List[SampledSequencesDTO]:
"""
Samples the model for the given number of SMILES.
:params warheads_list: A list of warhead pair SMILES.
:return: A list of SampledSequencesDTO.
"""
warheads_list = self._randomize_warheads(warheads_list) if self._randomize else warheads_list
clean_warheads = [self._attachment_points.remove_attachment_point_numbers(warheads) for warheads in warheads_list]
dataset = Dataset(clean_warheads, self.model.get_vocabulary().input)
data_loader = DataLoader(dataset, batch_size=len(dataset), shuffle=False, collate_fn=dataset.collate_fn)

for batch in data_loader:
sampled_sequences = []
for _ in range(self._batch_size):
sampled_sequences.extend(self.model.sample(*batch))

if self._sample_uniquely:
sampled_sequences = self._sample_unique_sequences(sampled_sequences)

return sampled_sequences

def _sample_unique_sequences(self, sampled_sequences: List[SampledSequencesDTO]) -> List[SampledSequencesDTO]:
# TODO could be part of a base sample action as it is the same for link and lib invent
strings = ["".join([ss.input, ss.output]) for index, ss in enumerate(sampled_sequences)]
unique_idxs = self._get_indices_of_unique_smiles(strings)
sampled_sequences_np = np.array(sampled_sequences)
unique_sampled_sequences = sampled_sequences_np[unique_idxs]
return unique_sampled_sequences.tolist()

def _randomize_warheads(self, warhead_pair_list: List[str]):
randomized_warhead_pair_list = []
for warhead_pair in warhead_pair_list:
warhead_list = warhead_pair.split(self._tokens.ATTACHMENT_SEPARATOR_TOKEN)
warhead_mol_list = [self._conversions.smile_to_mol(warhead) for warhead in warhead_list]
warhead_randomized_list = [self._conversions.mol_to_random_smiles(mol) for mol in warhead_mol_list]
# Note do not use self.self._bond_maker.randomize_scaffold, as it would add unwanted brackets to the
# attachment points (which are not part of the warhead vocabulary)
warhead_pair_randomized = self._tokens.ATTACHMENT_SEPARATOR_TOKEN.join(warhead_randomized_list)
randomized_warhead_pair_list.append(warhead_pair_randomized)
return randomized_warhead_pair_list
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from typing import Tuple, Any

import numpy as np
from reinvent_chemistry import Conversions
from reinvent_models.model_factory.generative_model_base import GenerativeModelBase

from running_modes.automated_curriculum_learning.actions import BaseSampleAction
from running_modes.automated_curriculum_learning.dto import SampledBatchDTO


class ReinventSampleModel(BaseSampleAction):
def __init__(self, model: GenerativeModelBase, batch_size: int, logger=None):
"""
Creates an instance of SampleModel.
:params model: A model instance.
:params batch_size: Batch size to use.
:return:
"""
super().__init__(logger)
self.model = model
self._batch_size = batch_size

self._conversions = Conversions()

def run(self) -> SampledBatchDTO:
seqs, smiles, agent_likelihood = self._sample_unique_sequences(self.model, self._batch_size)
batch = SampledBatchDTO(seqs, smiles, agent_likelihood)

return batch

def _sample_unique_sequences(self, agent: GenerativeModelBase, batch_size: int) -> Tuple[Any, Any, Any]:
seqs, smiles, agent_likelihood = agent.sample(batch_size)
unique_idxs = self._get_indices_of_unique_smiles(smiles)
seqs_unique = seqs[unique_idxs]
smiles_np = np.array(smiles)
smiles_unique = smiles_np[unique_idxs]
agent_likelihood_unique = agent_likelihood[unique_idxs]
return seqs_unique, smiles_unique, agent_likelihood_unique
Loading

0 comments on commit b7324d2

Please sign in to comment.