Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge CMake build process and Python wrapper shared library into main #14

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
name: Docker Build and Test

on:
push:
branches:
- main
pull_request:
workflow_dispatch:
inputs:
use_cache:
description: 'Use cache for this run'
required: true
default: 'true'
num_build_jobs:
description: 'Number of build jobs'
required: true
default: '2'

jobs:
build:
runs-on: ubuntu-latest

steps:
- name: Delete huge unnecessary tools folder
run: rm -rf /opt/hostedtoolcache

- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: recursive

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

- name: Login to GitHub Container Registry
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Cache build directory
if: github.event_name != 'workflow_dispatch' || github.event.inputs.use_cache == 'true'
uses: actions/cache@v3
with:
path: build
key: ${{ runner.os }}-build-${{ github.sha }}
restore-keys: |
${{ runner.os }}-build-

- name: Set lowercase repository owner
id: repo_owner
run: echo "::set-output name=owner::${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]'

- name: Build Docker image
uses: docker/build-push-action@v4
with:
context: .
load: true
tags: ghcr.io/${{ steps.repo_owner.outputs.owner }}/build_test:latest
cache-from: type=gha
cache-to: type=gha,mode=max
build-args: |
BUILDKIT_INLINE_CACHE=1
RAWHASH_NUM_BUILD_JOBS=${{ github.event.inputs.num_build_jobs || '2' }}

- name: Extract build directory from Docker image
run: |
container_id=$(docker create ghcr.io/${{ steps.repo_owner.outputs.owner }}/build_test:latest)
docker cp $container_id:/rawhash2/build ./build
docker rm $container_id

- name: Run Docker container
run: docker run --rm ghcr.io/${{ steps.repo_owner.outputs.owner }}/build_test:latest -h
18 changes: 13 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
bin/
build/
run_dir/
.venv/
example_out/
__pycache__/

test/data/d1_*
test/data/d2_*
Expand All @@ -8,7 +13,6 @@ test/data/d5_*
test/data/d6_*
test/data/d7_*
test/data/d8_*
test/data/download_d8_*
test/data/*/*.fa
test/data/*/*.fasta
test/data/*/*.tar
Expand Down Expand Up @@ -73,13 +77,17 @@ test/eval/*.summary
*/.DS_Store
test/*.time
test/*.idx
src/*.o
#src/*.o

extern/pod5*

test/evaluation/rawsamble/

extern/tensorflow/
test/scripts/*test*
test/evaluation/read_mapping/*/parameters.txt
test/evaluation/read_mapping/*/results.txt
test/evaluation/read_mapping/*parameters*.txt
test/evaluation/read_mapping/*parameters/
test/evaluation/read_mapping/s_modify.py

test/evaluation/read_mapping/*_parameters/
test/evaluation/read_mapping/s_modify.py
test/evaluation/read_mapping/*.features
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cmake_minimum_required(VERSION 3.10)
project(RawHash2Root)

add_subdirectory(src)
17 changes: 17 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
FROM gcc:latest

RUN apt-get update && apt-get install -y \
cmake make mold ccache \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /rawhash2
COPY . /rawhash2

ARG RAWHASH_NUM_BUILD_JOBS
RUN mkdir -p build && cd build \
&& cmake .. \
&& make -j $RAWHASH_NUM_BUILD_JOBS

ENTRYPOINT ["./build/bin/rawhash2"]

LABEL Name=rawhash2 Version=0.0.1
21 changes: 0 additions & 21 deletions Makefile

This file was deleted.

64 changes: 47 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,46 +40,76 @@ RawHash performs real-time mapping of nanopore raw signals. When the prefix of r

# Installation

* Clone the code from its GitHub repository (`--recursive` must be used):
* Clone the code from its GitHub repository and recursively initialize submodules:

```bash
git clone --recursive https://github.com/CMU-SAFARI/RawHash.git rawhash2
git clone https://github.com/CMU-SAFARI/RawHash.git rawhash2
cd rawhash2 && git submodule update --init --recursive
```

* Compile (Make sure you have a C++ compiler and GNU make):

```bash
cd rawhash2 && make
# if not doing a fresh clone, make sure that the submodules don't have anything built from previous makefile-based
# setup , i.e. delete extern directory, then initialize submodules as above
(mkdir -p build && cd build && cmake .. && make -j)
build/bin/rawhash2 -h
```

If the compilation is successful, the path to the binary will be `bin/rawhash2`.
Troubleshooting:
- `makefile error 2`: rerun `make -j`, then the actual error is shown
- updating submodules: the current cmake setup may not correctly handle this, so the easiest solution is to delete the build directory

If the compilation is successful, the default path to the binary will be `build/bin/rawhash2`.

* Installation

You can install RawHash2 into the CMake-provided platform-specific destination (e.g. `/usr/local/` on UNIX) with `make install`:

```bash
make install
rawhash2 -h
```

Installation directory can be overridden by providing `-DCMAKE_INSTALL_PREFIX=...` argument to the `cmake ..` command, e.g.

```bash
cmake -DCMAKE_INSTALL_PREFIX=./install ..
make -j
make install
./install/bin/rawhash2 -h
```

Note that `CMAKE_INSTALL_PREFIX` is a cached variable in CMake.

## Compiling with HDF5, SLOW5, and POD5

We are aware that some of the pre-compiled libraries (e.g., POD5) may not work in your system and you may need to compile these libraries from scratch. Additionally, it may be possible that you may not want to compile any of the HDF5, SLOW5, or POD5 libraries if you are not going to use them. RawHash2 provides a flexible Makefile to enable custom compilation of these libraries.
We are aware that some of the pre-compiled libraries (e.g., POD5) may not work in your system and you may need to compile these libraries from scratch. Additionally, it may be possible that you may not want to compile any of the HDF5, SLOW5, or POD5 libraries if you are not going to use them. RawHash2 provides several CMake options to enable custom compilation of these libraries.

* It is possible to provide your own include and lib directories for *any* of the HDF5, SLOW5, and POD5 libraries, if you do not want to use the source code or the pre-compiled binaries that come with RawHash2. To use your own include and lib directories you should pass them to `make` when compiling as follows:
It is possible to provide your own include and lib directories for *any* of the HDF5, SLOW5, and POD5 libraries, if you do not want to use the source code or the pre-compiled binaries that come with RawHash2. To use your own include and lib directories you should pass them to `cmake` when compiling as follows:

```bash
#Provide the path to all of the HDF5/SLOW5/POD5 include and lib directories during compilation
make HDF5_INCLUDE_DIR=/path/to/hdf5/include HDF5_LIB_DIR=/path/to/hdf5/lib \
SLOW5_INCLUDE_DIR=/path/to/slow5/include SLOW5_LIB_DIR=/path/to/slow5/lib \
POD5_INCLUDE_DIR=/path/to/pod5/include POD5_LIB_DIR=/path/to/pod5/lib
# Provide the path to all of the HDF5/SLOW5/POD5 include and lib directories during compilation
cmake -DHDF5_DIR=/path/to/hdf5 -DSLOW5_DIR=/path/to/slow5 -DPOD5_DIR=/path/to/pod5 ..

#Provide the path to only POD5 include and lib directories during compilation
make POD5_INCLUDE_DIR=/path/to/pod5/include POD5_LIB_DIR=/path/to/pod5/lib
# Provide the path to only POD5 include and lib directories during compilation
cmake -DPOD5_DIR=/path/to/pod5
```

* It is possible to disable compiling *any* of the HDF5, SLOW5, and POD5 libraries. To disable them, you can use the following variables
Note that the provided path should generally contain _both_ `include/` and `lib/` folders with the corresponding project's include and library files.

It is possible to disable compiling *any* of the HDF5, SLOW5, and POD5 libraries. To disable them, you can use the following variables

```bash
#Disables compiling HDF5
make NOHDF5=1
# Disables compiling HDF5
cmake -DNOHDF5=1 ..

#Disables compiling SLOW5 and POD5
make NOSLOW5=1 NOPOD5=1
# Disables compiling SLOW5 and POD5
cmake -DNOSLOW5=1 -DNOPOD5=1 ..
```

The variables and paths will be stored in CMake cache, meaning that you would need to run `cmake` again with explicitly provided new values to change them.

# Usage

## Getting help
Expand Down
57 changes: 57 additions & 0 deletions cmake/SetupCCacheMold.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
function(enable_ccache)
# export PATH="/usr/lib/ccache:$PATH"
find_program(CCACHE_EXE ccache)
if(CCACHE_EXE)
message(STATUS "found ccache at ${CCACHE_EXE}, using it")
set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_EXE}" CACHE STRING "C compiler launcher")
set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_EXE}" CACHE STRING "C++ compiler launcher")
else()
message(STATUS "ccache not found, not using it")
endif()
endfunction()

# mold is a much faster linker than ld, also see for mold: https://github.com/heavyai/heavydb/blob/master/CMakeLists.txt
# or try: https://gitlab.kitware.com/cmake/cmake/-/merge_requests/8861, can now use CMAKE_LINKER_TYPE
macro(set_alternate_linker linker)
find_program(LINKER_EXECUTABLE ld.${linker} ${linker})
if(LINKER_EXECUTABLE)
message(STATUS "Found linker ${linker}: ${LINKER_EXECUTABLE}")
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" AND "${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS 12.0.0)
add_link_options("-ld-path=${linker}")
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND "${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS 12.1.0 AND "${linker}" STREQUAL "mold")
# LINKER_EXECUTABLE will be a full path to ld.mold, so we replace the end of the path, resulting in the relative
# libexec/mold dir, and tell GCC to look there first for an override version of executables, in this case, ld
string(REPLACE "bin/ld.mold" "libexec/mold" PATH_TO_LIBEXEC_MOLD ${LINKER_EXECUTABLE})
add_link_options("-B${PATH_TO_LIBEXEC_MOLD}")
else()
add_link_options("-fuse-ld=${linker}")
endif()
else()
message(FATAL_ERROR "Could not find linker ${linker}")
endif()
endmacro()

# not working
# function(setup_ccache_mold)
# if(USE_CCACHE)
# find_program(CCACHE_PROGRAM ccache)
# if(CCACHE_PROGRAM)
# message(STATUS "ccache found: ${CCACHE_PROGRAM}")
# set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
# set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_PROGRAM}")
# else()
# message(WARNING "ccache not found, using default compiler")
# endif()
# endif()

# if(USE_MOLD)
# find_program(MOLD_PROGRAM mold)
# # todo: does not seem to work, for a working configuration, see https://github.com/ratschlab/readuntil_fake/blob/refactor/cmake/utils.cmake
# if(MOLD_PROGRAM)
# message(STATUS "mold found: ${MOLD_PROGRAM}")
# set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${MOLD_PROGRAM}")
# else()
# message(WARNING "mold not found, using default linker")
# endif()
# endif()
# endfunction()
41 changes: 41 additions & 0 deletions cmake/SetupHDF5.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
include(${CMAKE_CURRENT_LIST_DIR}/Utils.cmake)

function(add_hdf5_to_target TARGET_NAME)
if(NOHDF5)
target_compile_definitions(${TARGET_NAME} PRIVATE NHDF5RH=1)
else()
if(HDF5_COMPILE)
add_dependencies(${TARGET_NAME} hdf5_build)
endif()
add_imported_library(${TARGET_NAME} hdf5)
endif()
endfunction()

function(setup_hdf5)
if(NOT NOHDF5)
# print HDF5_DIR
message(STATUS "EXTERNAL_PROJECTS_BUILD_DIR: ${EXTERNAL_PROJECTS_BUILD_DIR}")
message(STATUS "HDF5_DIR: ${HDF5_DIR}")
set(HDF5_SOURCE_DIR ${CMAKE_SOURCE_DIR}/extern/hdf5)
if(HDF5_COMPILE)
if(NOT HDF5_DIR)
override_cached(HDF5_DIR ${EXTERNAL_PROJECTS_BUILD_DIR}/hdf5)
endif()
set(HDF5_BUILD_DIR ${HDF5_DIR}/build)
ExternalProject_Add(
hdf5_build
BUILD_ALWAYS 1 # Rebuild if local checkout is updated
SOURCE_DIR ${HDF5_SOURCE_DIR}
BINARY_DIR ${HDF5_BUILD_DIR}
CONFIGURE_COMMAND ${HDF5_SOURCE_DIR}/configure --enable-threadsafe --disable-hl --prefix=${HDF5_BUILD_DIR}
# INSTALL_DIR and DCMAKE_INSTALL_PREFIX are ignored by hdf5
INSTALL_COMMAND make install prefix=${HDF5_DIR}
)
else()
if(NOT HDF5_DIR)
message(FATAL_ERROR "HDF5_COMPILE is OFF, but no dir provided")
endif()
endif()
define_imported_library(hdf5 ${HDF5_DIR} SHARED)
endif()
endfunction()
Loading
Loading