Skip to content

Commit

Permalink
v7.4 open source release (#35)
Browse files Browse the repository at this point in the history
* prepare for v7.4 release

* remove bazelversion restriction

* fix c++ tests
  • Loading branch information
xuyan-ru authored Jun 22, 2024
1 parent 863cf7c commit 6fd28e6
Show file tree
Hide file tree
Showing 784 changed files with 156,130 additions and 87,447 deletions.
19 changes: 8 additions & 11 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
# builder stage
FROM gcc:13.2 AS build
FROM python:3.11.8 AS build

RUN apt-get update && \
apt-get upgrade -y

RUN apt-get install npm -y && \
npm install -g @bazel/bazelisk && \
apt-get install libblas-dev -y && \
apt-get install liblapack-dev -y
apt-get install libblas-dev liblapack-dev libzmq3-dev -y

RUN apt-get install python3-minimal -y && \
apt-get install python3-pandas python3-scipy python3-absl python3-pybind11 python3-protobuf -y && \
rm -f /usr/bin/python && ln -s /usr/bin/python3 /usr/bin/python
RUN pip install pandas scipy absl-py pybind11 protobuf

COPY . ./LillyMol

Expand All @@ -22,16 +19,16 @@ ENV LILLYMOL_HOME=/LillyMol \
BUILD_BDB=1 \
BUILD_PYTHON=1

RUN ./update_bazel_configs.sh && ./build_third_party.sh && ./build_from_src.sh
RUN ./build_linux.sh

# final stage
FROM ubuntu:mantic AS final
FROM python:3.11.8-slim AS final

RUN apt-get update && \
apt-get upgrade -y && \
apt-get install python3-minimal -y && \
apt-get install python3-pandas python3-scipy python3-absl python3-pybind11 python3-protobuf -y && \
rm -f /usr/bin/python && ln -s /usr/bin/python3 /usr/bin/python
apt-get install libgomp1 -y

RUN pip install pandas scipy absl-py pybind11 protobuf

COPY --from=build /LillyMol /LillyMol

Expand Down
48 changes: 23 additions & 25 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,38 +17,35 @@
.PHONY: default
.PHONY: all

# Determine the operating system
UNAME := $(shell uname)

ifeq ($(UNAME),Darwin)
BUILD_SCRIPT := build_macos.zsh
else
BUILD_SCRIPT := build_linux.sh
endif

# Determine REPO_HOME
REPO_HOME := $(CURDIR)

# A default target that will probably work in most cases.
# Note it does not build BerkeleyDB dependent tools or Python bindings.
default:
bash -c 'if [[ -z "$$(type -p bazelisk)" && -z "$$(type -p bazel)" ]] ; then echo "No bazel/bazelisk, see README.md" && exit 1 ; fi'
echo "Default build does not build targets 'berkeleydb' and 'python'"
cd src && ./update_bazel_configs.sh
cd src && ./build_third_party.sh
cd src && ./build_from_src.sh
@echo "Build platform: $(UNAME)"
cd src && REPO_HOME=$(REPO_HOME) ./$(BUILD_SCRIPT)

all:
bash -c 'if [[ -z "$$(type -p bazelisk)" && -z "$$(type -p bazel)" ]] ; then echo "No bazel/bazelisk, see README.md" && exit 1 ; fi'
cd src && BUILD_BDB=1 BUILD_PYTHON=1 BUILD_VENDOR=1 ./update_bazel_configs.sh
cd src && BUILD_BDB=1 BUILD_PYTHON=1 BUILD_VENDOR=1 ./build_third_party.sh
cd src && BUILD_BDB=1 BUILD_PYTHON=1 BUILD_VENDOR=1 ./build_from_src.sh
@echo "Build platform: $(UNAME)"
cd src && REPO_HOME=$(REPO_HOME) BUILD_BDB=1 BUILD_PYTHON=1 BUILD_XGBOOST=1 BUILD_VENDOR=1 ./$(BUILD_SCRIPT)

berkeleydb:
bash -c 'if [[ -z "$$(type -p bazelisk)" && -z "$$(type -p bazel)" ]] ; then echo "No bazel/bazelisk, see README.md" && exit 1 ; fi'
cd src && BUILD_BDB=1 BUILD_PYTHON=1 BUILD_VENDOR=1 ./update_bazel_configs.sh
cd src && BUILD_BDB=1 ./build_third_party.sh
cd src && BUILD_BDB=1 ./build_from_src.sh

python:
bash -c 'if [[ -z "$$(type -p bazelisk)" && -z "$$(type -p bazel)" ]] ; then echo "No bazel/bazelisk, see README.md" && exit 1 ; fi'
cd src && BUILD_BDB=1 BUILD_PYTHON=1 BUILD_VENDOR=1 ./update_bazel_configs.sh
cd src && BUILD_PYTHON=1 ./build_third_party.sh
cd src && BUILD_PYTHON=1 ./build_from_src.sh
advance:
@echo "Build platform: $(UNAME)"
cd src && REPO_HOME=$(REPO_HOME) BUILD_BDB=1 BUILD_PYTHON=1 ./$(BUILD_SCRIPT)

vendor:
bash -c 'if [[ -z "$$(type -p bazelisk)" && -z "$$(type -p bazel)" ]] ; then echo "No bazel/bazelisk, see README.md" && exit 1 ; fi'
cd src && BUILD_BDB=1 BUILD_PYTHON=1 BUILD_VENDOR=1 ./update_bazel_configs.sh
cd src && BUILD_VENDOR=1 ./build_third_party.sh
cd src && BUILD_VENDOR=1 ./build_from_src.sh
@echo "Build platform: $(UNAME)"
cd src && REPO_HOME=$(REPO_HOME) BUILD_VENDOR=1 ./$(BUILD_SCRIPT)

build_docker:
docker build -f Dockerfile -t lillymolprivate .
Expand All @@ -61,6 +58,7 @@ test_lillymol:
start_s3:
docker-compose up -d
sleep 30
echo 'run s3 commannd with: aws s3 --endpoint "http://localhost:4566" <s3 command>'
@echo 'run s3 commannd with: aws s3 --endpoint "http://localhost:4566" <s3 command>'

stop_s3:
docker-compose down
32 changes: 15 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ If you use the module system
module load bazelisk
```
If you are NOT building the python bindings, bazel or bazelisk is equivalent.
If building the python bindings, a frozen version of bazel is needed and this
is controlled by bazelisk.

The software requires a gcc version of at least version 10. This version of LillyMol
uses some fairly recent c++ features, which require a recent compiler. The software
Expand All @@ -73,8 +71,7 @@ module load bazelisk
module load git
```

Other system components that are needed, which may or may not be
available.
Other system components that are needed

* wget
* unzip
Expand All @@ -87,9 +84,21 @@ tested on any other version, although we have no reason to believe
it will not work with other versions. You will need to install
```
pip install pybind11 absl-py protobuf
apt install python-dev
```
Note that with the default build (below) Python bindings are not built.


Make sure that python-dev and libblas-dev are installed.

```
sudo apt install python-dev libblas-dev
```
Things seem to work seamlessly in virtualenv.

Installation within virtualenv works well.


# TLDR
If you have bazelisk and gcc installed, there is a reasonable possibility that
issuing `make` in the top level directory will work (but see note below
Expand Down Expand Up @@ -118,7 +127,8 @@ make all
```

If you look at [Makefile](Makefile) you will see that all it is doing
is sequentially invoking the three scripts discussed below, possibly with
is sequentially invoking the three scripts discussed below, with
different shell variables set.

### Configuring for bazel
Within the src directory, the file `WORKSPACE` configures the build environment
Expand Down Expand Up @@ -287,15 +297,3 @@ The distribution contains `cmake` infrastructure, that is currently
not functional. Within Lilly we have not been able to make it work,
usually as a result of conflicting protcol buffer versions on the
system. Work is ongoing to get cmake working for the public release.

## Overall Recipe to Build (inside Lilly).

```bash
module load gcc10
module load bazelisk
module load git
# ensure 'python' invokes to a suitable version, install requirements if needed
# pip install pybind11 absl-py protobuf
make python berkeleydb

# copy executables and library files out of the repo to their final destination.
18 changes: 18 additions & 0 deletions contrib/Molecular_Property_Profile/column_descriptions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,12 @@ feature_to_description {
description: "Number of Hydrogen bond donors, Bruns"
}
}
feature_to_description {
key: "w_alogp"
value {
description: "alogP"
}
}
feature_to_description {
key: "w_xlogp"
value {
Expand Down Expand Up @@ -424,3 +430,15 @@ feature_to_description {
description: "Number of rings with 7 or more atoms"
}
}
feature_to_description {
key: "w_ro5_ohnh"
value {
description: "Lipinski Donors"
}
}
feature_to_description {
key: "w_ro5_on"
value {
description: "Lipinski Acceptors"
}
}
2 changes: 1 addition & 1 deletion contrib/Molecular_Property_Profile/generate_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def generate_feature_profile(data: pd.DataFrame,
if column_number < 0:
logging.fatal("No %s in %r", feature_name, data.columns)

feature_type = data.dtypes[column_number]
feature_type = data.dtypes.iloc[column_number]

if verbose:
logging.info("Feature %s found in column %d type %r", feature_name, column_number, feature_type)
Expand Down
2 changes: 1 addition & 1 deletion contrib/Molecular_Property_Profile/generate_profile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ dir=$(dirname $0)
script="${0%%.sh}.py"
echo $script

python ${script} --feature_descriptions ${dir}/column_descriptions.txt "$@"
python3 ${script} --feature_descriptions ${dir}/column_descriptions.txt "$@"
34 changes: 34 additions & 0 deletions contrib/bin/FP/alogp.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# frozen_string_literal: true

# Alogp class for gfp_make

require_relative 'lib/fp_common'

# Class for Alogp fingerprints.
class ALOGP
attr_reader :description

def initialize
@rx = Regexp.new('^ALOGP')
@description = 'AlogP fingerprint'
@executable = 'alogp'
end

def match?(fp) # rubocop:disable Naming/MethodParameterName
@rx.match?(fp)
end

def expand(fp, first_in_pipeline:, extra_qualifiers:) # rubocop:disable Naming/MethodParameterName
m = /^ALOGP(\d+)*/.match(fp)
raise "Unrecognized ALOGP fp form '#{fp}'" unless m

cmd = FpCommon.initial_command_stem(@executable, first_in_pipeline: first_in_pipeline,
extra_qualifiers: extra_qualifiers)
replicates, atype, fixed = FpCommon.parse_fp_token(fp[5..])

# fixed is the default, and non colliding does not work with iwfp.
cmd << ' -J NCALOGP -Y alcacid -Y RDKIT.N+ -Y quiet'
cmd << " -p #{replicates}" if replicates
cmd
end
end
39 changes: 39 additions & 0 deletions contrib/bin/FP/ap.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# frozen_string_literal: true

# AP class for gfp_make

require_relative 'lib/fp_common'

# Atom pair class.
class AP
attr_reader :description

def initialize
@rx = Regexp.new('^M*AP')
@description = 'Atom pair fingerprints'
@executable = 'atom_pair_fingerprint'
end

def match?(fp) # rubocop:disable Naming/MethodParameterName
@rx.match?(fp)
end

def expand(fp, first_in_pipeline:, extra_qualifiers:) # rubocop:disable Naming/MethodParameterName
m = /^M*AP(\d+)*/.match(fp)
raise "Unrecognized AP fp form '#{fp}'" unless m

cmd = FpCommon.initial_command_stem(@executable, first_in_pipeline: first_in_pipeline,
extra_qualifiers: extra_qualifiers)
path_length, atype, fixed = FpCommon.parse_fp_token(fp.gsub(/^M*AP/, ''))
$stderr << "path_length #{path_length} atype #{atype} fixed #{fixed}\n"

if fp.match(/:fixed/)
cmd << ' -J fixed -J FPAP'
else
cmd << ' -J NCAP'
end
cmd << "#{path_length} -R #{path_length}" if path_length
cmd << " -P #{atype}" if atype
cmd
end
end
43 changes: 43 additions & 0 deletions contrib/bin/FP/cats.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# CATS class for gfp_make

require_relative 'lib/fp_common'

# Class for CATS fingerprints.
class CATS
attr_reader :description

def initialize
@rx = Regexp.new('^CATSP*')
@description = 'CATS pharmacaphore fingerprint'
@executable = 'jwcats.sh'
end

def match?(fp) # rubocop:disable Naming/MethodParameterName
@rx.match?(fp)
end

def expand(fp, first_in_pipeline:, extra_qualifiers:) # rubocop:disable Naming/MethodParameterName
m = /^CATSP*(\d+)*/.match(fp)
raise "Unrecognized CATS fp form '#{fp}'" unless m

cmd = FpCommon.initial_command_stem(@executable, first_in_pipeline: first_in_pipeline,
extra_qualifiers: extra_qualifiers)
tag = "NCCATS"
if /^CATSP(\d+)*/.match(fp)
path_length, atype, fixed = FpCommon.parse_fp_token(fp[5..])
tag << 'P'
cmd << ' -p'
else
path_length, atype, fixed = FpCommon.parse_fp_token(fp[4..])
end

if path_length
tag << path_length.to_s
end

cmd << " -J #{tag}"
cmd << " -m #{path_length}" if path_length

cmd
end
end
33 changes: 33 additions & 0 deletions contrib/bin/FP/dsc.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# frozen_string_literal: true

# iwdescr fingerprint class for gfp_make

require_relative 'lib/fp_common'

# Class for descriptor based fingerprints.
class DSC
attr_reader :description

def initialize
@rx = Regexp.new('^DSC')
@description = 'Descriptor based fingerprint'
@executable = 'iwdescr'
end

def match?(fp) # rubocop:disable Naming/MethodParameterName
@rx.match?(fp)
end

def expand(fp, first_in_pipeline:, extra_qualifiers:) # rubocop:disable Naming/MethodParameterName
m = /^DSC(\d+)*/.match(fp)
raise "Unrecognized DSC fp form '#{fp}'" unless m

cmd = FpCommon.initial_command_stem(@executable, first_in_pipeline: first_in_pipeline,
extra_qualifiers: extra_qualifiers)
# iwdescr already has a -f option that does something different.
cmd = cmd.gsub(/-f/, '-G FILTER') unless first_in_pipeline
cmd << ' -O dm -O complex'
# TODO: ianwatson Figure out donor/acceptor things...
cmd
end
end
Loading

0 comments on commit 6fd28e6

Please sign in to comment.