diff --git a/.github/workflows/containers.yaml b/.github/workflows/containers.yaml new file mode 100644 index 00000000..fce92588 --- /dev/null +++ b/.github/workflows/containers.yaml @@ -0,0 +1,120 @@ +name: Containers + +on: + push: + tags: + - "**" + branches: + - "**" + paths: + - "build/containers/**" + - "core" + - "combination" + +jobs: + checks: + name: Run quality checks + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Install requirements + run: | + sc_version="stable" # or "v0.4.7", or "latest" + hl_version="v2.12.0" + case $( uname -m ) in + arm64 | aarch64 ) + sc_platform=aarch64 + hl_platform=arm64 + ;; + *) + sc_platform=$( uname -m ) + hl_platform="${sc_platform}" + ;; + esac + + echo "Installing shellcheck ..." + wget -qO- "https://github.com/koalaman/shellcheck/releases/download/${sc_version?}/shellcheck-${sc_version?}.linux.${sc_platform}.tar.xz" \ + | tar -xJv >/dev/null + sudo cp "shellcheck-${sc_version}/shellcheck" /usr/bin/ + shellcheck --version + + echo "Installing hadolint ..." + wget -q https://github.com/hadolint/hadolint/releases/download/${hl_version}/hadolint-Linux-${hl_platform} + sudo cp "hadolint-Linux-${hl_platform}" /usr/bin/hadolint + sudo chmod 755 /usr/bin/hadolint + hadolint --version + + - name: Run quality checks + run: | + make -C build/containers checks + + list: + name: List containers + runs-on: ubuntu-latest + env: + IS_TAG: ${{ startsWith(github.ref, 'refs/tags/') }} + outputs: + containers: ${{ steps.list-containers.outputs.containers }} + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + fetch-depth: 2 + + - id: list-containers + name: Determine containers + run: | + if [ "${IS_TAG}" == "false" ]; then + echo "Listing only the containers with changes" + containers=$( build/containers/build.sh --changed-containers-as-json) + elif [ "${IS_TAG}" == "true" ]; then + echo "Listing all the containers" + containers=$( build/containers/build.sh --all-containers-as-json) + fi + echo "containers = ${containers}" + echo "containers=${containers}" >> "$GITHUB_OUTPUT" + + build: + name: Build + runs-on: ubuntu-latest + needs: + - checks + - list + strategy: + max-parallel: 8 + fail-fast: false + matrix: + container: ${{ fromJSON(needs.list.outputs.containers) }} + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Build Docker image + run: build/containers/build.sh --containers ${{ matrix.container }} + + push: + name: Push + if: startsWith(github.ref, 'refs/tags/') + runs-on: ubuntu-latest + env: + DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} + needs: + - build + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Login to DockerHub + if: ${{ env.DOCKER_USERNAME != '' }} + run: echo ${{ secrets.DOCKER_PASSWORD }} | docker login -u ${{ secrets.DOCKER_USERNAME }} --password-stdin + + - name: Push Docker images + if: ${{ env.DOCKER_USERNAME != '' }} + run: | + build/containers/build.sh --dry-run --skip-build --push --all-containers diff --git a/build/Makefile b/build/Makefile index ca092b0e..b191e1ba 100644 --- a/build/Makefile +++ b/build/Makefile @@ -3,12 +3,10 @@ SHELL := /bin/bash .PHONY: all clean # trick to make all the first target # and set it to the end once all variables are defined -all: containers datasets +all: datasets build_dir = .. INTOGEN_DATASETS ?= ${build_dir}/datasets -INTOGEN_CONTAINERS ?= ${build_dir}/containers - # hg version genome = 38 @@ -17,17 +15,10 @@ cadd ?= 1.6 # number of cores to use in steps that allow them cores ?= 1 - src_datasets = datasets -src_containers = containers - -container_builder = ${src_containers}/build.sh - # accumulate all targets DATASETS = -CONTAINERS_SUDO = -CONTAINERS_USER = # useful variables ensembl_db = "homo_sapiens_core_${ensembl}_${genome}" @@ -39,9 +30,6 @@ grch = GRCh${genome} $(INTOGEN_DATASETS): mkdir -p $@ -$(INTOGEN_CONTAINERS): - mkdir -p $@ - # Create checkpoints files so that if versions are changed # proper files are rebuild GENOME = $(INTOGEN_DATASETS)/.hg${genome} @@ -55,23 +43,16 @@ $(ENSEMBL): | $(INTOGEN_DATASETS) $(CADD): | $(INTOGEN_DATASETS) touch $@ - - # Use second expansion for mixed dependencies .SECONDEXPANSION: include ${src_datasets}/*.mk include ${src_datasets}/*/*.mk -include ${src_containers}/*/*.mk ######################### -.PHONY: datasets containers sudo +.PHONY: datasets datasets: bgdata $(DATASETS) | $(INTOGEN_DATASETS) -containers: $(CONTAINERS_USER) $(CONTAINERS_SUDO) | $(INTOGEN_CONTAINERS) - -sudo: $(CONTAINERS_SUDO) | $(INTOGEN_CONTAINERS) - clean: - rm -rf $(INTOGEN_DATASETS) $(INTOGEN_CONTAINERS) + rm -rf $(INTOGEN_DATASETS) diff --git a/build/containers/Makefile b/build/containers/Makefile new file mode 100644 index 00000000..99ba101b --- /dev/null +++ b/build/containers/Makefile @@ -0,0 +1,64 @@ +CURRENT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST)))) + +BUILD := $(CURRENT_DIR)/build.sh +BUILD_ARGS ?= + +SHELLCHECK_BIN ?= shellcheck +SHELLCHECK_ARGS ?= + +HADOLINT_IGNORES := DL3059 # Multiple consecutive `RUN` instructions +HADOLINT_IGNORES += DL3003 # Use WORKDIR to switch to a directory +HADOLINT_IGNORES += DL3042 # Avoid use of cache directory with pip + +# HADOLINT_BIN := docker run --rm -i hadolint/hadolint hadolint +HADOLINT_BIN ?= hadolint +HADOLINT_ARGS ?= $(foreach ignore,$(HADOLINT_IGNORES),--ignore $(ignore)) + +# Those ANSI codes are needed to print with colours +GREEN := \033[0;32m +WHITE := \033[0;37m +BOLD := \033[1m +BOLD_GREEN := $(GREEN)$(BOLD) +BOLD_WHITE := $(WHITE)$(BOLD) +RESET := \033[0m + +.DEFAULT_GOAL = checks + +.PHONY: checks +checks: shellcheck hadolint + +.PHONY: shellcheck +shellcheck: + @echo "$(BOLD_GREEN)Running cheks for shell scripts ...$(RESET)" + set -e; \ + find -L $(CURRENT_DIR) -name '*.sh' | sed 's|$(CURRENT_DIR)/||' \ + | while read -r script; do \ + echo "$(BOLD_WHITE)=> $${script}$(RESET)"; \ + $(SHELLCHECK_BIN) $(SHELLCHECK_ARGS) $${script}; \ + done + +.PHONY: hadolint +hadolint: + @echo "$(BOLD_GREEN)Running checks for Dockerfile ...$(RESET)" + set -e; \ + find -L $(CURRENT_DIR) -name 'Dockerfile' | sed 's|$(CURRENT_DIR)/||' \ + | while read -r dockerfile; do \ + echo "$(BOLD_WHITE)=> $${dockerfile}$(RESET)"; \ + $(HADOLINT_BIN) $(HADOLINT_ARGS) - < $${dockerfile}; \ + done + +.PHONY: build-all +build-all: + $(BUILD) --all-containers $(BUILD_ARGS) + +.PHONY: build-changes +build-changes: + $(BUILD) $(BUILD_ARGS) + +.PHONY: push-all +push-all: + $(BUILD) --push --all-containers $(BUILD_ARGS) + +.PHONY: push-changes +push-changes: + $(BUILD) --push $(BUILD_ARGS) diff --git a/build/containers/README.md b/build/containers/README.md new file mode 100644 index 00000000..ec057779 --- /dev/null +++ b/build/containers/README.md @@ -0,0 +1,61 @@ +# Containers + +This folder contains all the containers needed by the intogen pipelines specifically. + +Every container will have an independent folder here or a link to another folder in the repository. Every of those folders need to include a `Dockerfile` with the specs to build the Docker image. + +## CI/CD + +The Github workflow `containers.yaml` will run the following jobs: +- Run some quality checks to make sure that we follow the best practices +- Determine which containers need to be built and/or pushed: + - When the push corresponds with a tag, all the containers will be built and pushed. + - When this is a regular push into a branch, only the containers that were modified will be built to make sure that they are not broken by the changes. + +You can run the workflow locally with the following command: + +```shell +act -W '.github/workflows/containers.yaml' +``` + +Note that you will need to install the tool [act](https://github.com/nektos/act). + +## Local Development + +You can run the following commands (note that you can skip the `-C build/containers` argument if you run the commands from the `build/containers` folder): + +### Running quality checks + +We run two types of checks: +- [shellcheck](https://github.com/koalaman/shellcheck) to make sure that bash scripts follow the best practices +- [hadolint](https://github.com/hadolint/hadolint) to make sure that Dockerfiles follow the best practices + +You will need to install them in your computer before running the following commands: + +- To run both checks: + + ```shell + make -C build/containers checks + ``` + +- Or individually with: + + ```shell + make -C build/containers shellcheck + ``` + + ```shell + make -C build/containers hadolint + ``` + +### Building all the containers + +```shell +make -C build/containers build-all +``` + +### Building only the containers that were modified + +```shell +make -C build/containers build-changed +``` diff --git a/build/containers/build.sh b/build/containers/build.sh index 3f13d97e..4985818c 100755 --- a/build/containers/build.sh +++ b/build/containers/build.sh @@ -1,19 +1,196 @@ -#!/usr/bin/env bash +#!/bin/bash + +# Automatically exit on any failure set -e -FOLDER=$1 -IMAGE=$2 +# Those ANSI codes are needed to print with colours +GREEN='\033[0;32m' +YELLOW='\033[0;33m' +CYAN='\033[0;36m' +GREY='\033[38;5;241m' +RED='\033[0;31m' +# WHITE='\033[0;37m' +BOLD='\033[1m' +RESET='\033[0m' # Reset to default color + +# --------------------------------------------------------------------------------------- +# Global variables ---------------------------------------------------------------------- +# --------------------------------------------------------------------------------------- + +# Set CONTAINERS_DIR to the path where this script is located +CONTAINERS_DIR=$(readlink -f "${BASH_SOURCE[0]}" | xargs dirname) + +# List of containers (By default all folders with git changes) +CONTAINERS="" + +# Whether to run or not docker comands +DRY_RUN="false" + +# Whether to skip the build or not +SKIP_BUILD="no" + +# Whether to push the containers into the DockerHub or not +PUSH="false" + +# What docker binary to use +DOCKER_BIN="docker" + +# BuildKit’s advanced features and output capabilities for the Docker builds +export DOCKER_BUILDKIT=1 +export BUILDKIT_PROGRESS=plain + +# --------------------------------------------------------------------------------------- +# Function to determine which containers have changes ----------------------------------- +# --------------------------------------------------------------------------------------- + +changed_containers() { + ( git diff --cached --name-only && git diff --name-only HEAD~1 HEAD ) | + xargs dirname | + sed 's|^build/containers/||' | + sed -E 's/^(combination|core)/intogen-\1/' | + sort -u | + while read -r path; do + if [ -f "${CONTAINERS_DIR}/$path/Dockerfile" ]; then + echo "$path" + fi + done +} + +# --------------------------------------------------------------------------------------- +# Function to determine all the containers available ------------------------------------ +# --------------------------------------------------------------------------------------- + +all_containers() { + find -L "${CONTAINERS_DIR}" -name 'Dockerfile' -print0 | + xargs -0 dirname | xargs basename | sort -u +} + +# --------------------------------------------------------------------------------------- +# Function to format a list of containers for display ----------------------------------- +# --------------------------------------------------------------------------------------- + +format_display() { + echo "$1" | tr '\n' ',' | sed -E 's/,$//' +} + +# --------------------------------------------------------------------------------------- +# Function to format a list of containers into JSON ------------------------------------- +# --------------------------------------------------------------------------------------- + +format_json() { + echo "$1" | jq --raw-input . | jq --slurp --compact-output . +} + +# --------------------------------------------------------------------------------------- +# Function to build the Docker images --------------------------------------------------- +# --------------------------------------------------------------------------------------- + +# This will build each container sequentially. +# It assumes that every container folder will contain a Dockerfile. +# The Docker images will be named after its folder and tagged with the git tag. +# Some folders are just symlinks to upper level folders. +build() { + container=$1 + tag=$2 + + image="${container}:${tag}" + echo -e "${GREEN}${BOLD}Building Docker image for ${YELLOW}${image}${GREEN} ...${RESET}" + command="${DOCKER_BIN} build -t ${image} ${CONTAINERS_DIR}/${container}" + echo -e "${GREY}> ${command}${RESET}" + if [ "${DRY_RUN}" != "yes" ]; then + eval "${command}" + fi +} + +# --------------------------------------------------------------------------------------- +# Function to push the containers to the DockerHub -------------------------------------- +# --------------------------------------------------------------------------------------- + +push() { + container=$1 + tag=$2 + + image="${container}:${tag}" + echo -e "${GREEN}${BOLD}Pushing Docker image for ${YELLOW}${image}${GREEN} ...${RESET}" + command="${DOCKER_BIN} push ${image}" + echo -e "${GREY}> ${command}${RESET}" + if [ "${DRY_RUN}" != "yes" ]; then + eval "${command}" + fi +} + +# --------------------------------------------------------------------------------------- +# Parse command line arguments ---------------------------------------------------------- +# --------------------------------------------------------------------------------------- + +while [[ $# -gt 0 ]]; do + case $1 in + --changed-containers-as-json) + format_json "$(changed_containers)" + exit 0 + ;; + --all-containers-as-json) + format_json "$(all_containers)" + exit 0 + ;; + --containers) + CONTAINERS="${2//,/$'\n'}" + shift + shift + ;; + --all-containers) + CONTAINERS=$(all_containers) + shift + ;; + -n | --dry-run) + DRY_RUN="yes" + shift + ;; + --skip-build) + SKIP_BUILD="yes" + shift + ;; + --push) + PUSH="yes" + shift + ;; + --podman) + DOCKER_BIN="podman" + shift + ;; + *) + echo -e "${RED}${BOLD}Unknown option: ${YELLOW}$1${RESET}" >&2 + exit 1 + ;; + esac +done + +# --------------------------------------------------------------------------------------- +# --------------------------------------------------------------------------------------- + +if [ -z "${CONTAINERS}" ]; then + CONTAINERS=$(changed_containers) +fi + +CONTAINERS_FMT=$(format_display "$CONTAINERS") + +# Set TAG to either the current git tag name or else the git commit short SHA +TAG=$(git describe --tags --exact-match 2>/dev/null || git rev-parse --short HEAD) -tmpdir=`mktemp -d` +echo -e "${CYAN}${BOLD}Containers: ${YELLOW}${CONTAINERS_FMT}${CYAN}${RESET}" -## if you have root permission you can uncomment the following: +if [ "${SKIP_BUILD}" == "no" ]; then + for container in ${CONTAINERS}; do + build "${container}" "${TAG}" + done -# (cd ${FOLDER} && sudo singularity build ${tmpdir}/$(basename ${IMAGE}) Singularity) -# mv ${tmpdir}/$(basename ${IMAGE}) ${IMAGE} -# sudo chown ${USER}: ${IMAGE} + echo -e "${GREEN}${BOLD}All images built successfully for tag ${TAG}: ${YELLOW}${CONTAINERS_FMT}${CYAN}${RESET}" +fi -## if you DO NOT have root permission then you could use the --fakeroot flag as long as your user is specified in the /etc/subuid and /etc/subgid files. See https://docs.sylabs.io/guides/3.6/admin-guide/user_namespace.html#user-namespaces-fakeroot for more info. +if [ "${PUSH}" == "yes" ]; then + for container in ${CONTAINERS}; do + push "${container}" "${TAG}" + done -(cd ${FOLDER} && singularity build --fakeroot ${tmpdir}/$(basename ${IMAGE}) Singularity) -mv ${tmpdir}/$(basename ${IMAGE}) ${IMAGE} -chown ${USER}: ${IMAGE} + echo -e "${GREEN}${BOLD}All images pushed successfully for tag ${TAG}: ${YELLOW}${CONTAINERS_FMT}${CYAN}${RESET}" +fi diff --git a/build/containers/cbase/Singularity b/build/containers/cbase/Singularity deleted file mode 100644 index 303b9a90..00000000 --- a/build/containers/cbase/Singularity +++ /dev/null @@ -1,19 +0,0 @@ -Bootstrap: docker -From: debian:buster-slim - -%environment - export LC_ALL=C.UTF-8 - -%runscript - exec "/usr/bin/python" "/cbase/cbase.py" "$@" - -%setup - mkdir ${SINGULARITY_ROOTFS}/cbase - cp cbase.py ${SINGULARITY_ROOTFS}/cbase/cbase.py - cp -r Auxiliary/ ${SINGULARITY_ROOTFS}/cbase/Auxiliary/ - chmod -R a+xr ${SINGULARITY_ROOTFS}/cbase/ - -%post - apt-get update - apt-get install -y python-scipy python-statsmodels python-mpmath - rm -rf /var/lib/apt/lists/* diff --git a/build/containers/cbase/cbase.mk b/build/containers/cbase/cbase.mk deleted file mode 100644 index 355364fa..00000000 --- a/build/containers/cbase/cbase.mk +++ /dev/null @@ -1,14 +0,0 @@ - -CBASE_CONTAINER = $(INTOGEN_CONTAINERS)/cbase.simg - -cbase_container_srcdir = ${src_containers}/cbase - -cbase_container_src = ${cbase_container_srcdir}/cbase.py \ - ${cbase_container_srcdir}/Singularity - -$(CBASE_CONTAINER): $(cbase_container_src) | $(INTOGEN_CONTAINERS) - @echo Building CBaSE container - ${container_builder} ${cbase_container_srcdir} $@ - - -CONTAINERS_SUDO += $(CBASE_CONTAINER) \ No newline at end of file diff --git a/build/containers/combination/Singularity b/build/containers/combination/Singularity deleted file mode 100644 index de1150a5..00000000 --- a/build/containers/combination/Singularity +++ /dev/null @@ -1,16 +0,0 @@ -Bootstrap: docker -From: python:3.9 - -%environment - -%runscript - -%setup - cp -r ../../../combination ${SINGULARITY_ROOTFS}/combination/ - -%post - - pip install /combination - - # Clean unused things - rm -rf /var/lib/apt/lists/* diff --git a/build/containers/combination/combination.mk b/build/containers/combination/combination.mk deleted file mode 100644 index d42b9a7c..00000000 --- a/build/containers/combination/combination.mk +++ /dev/null @@ -1,17 +0,0 @@ - -COMBINATION_CONTAINER = $(INTOGEN_CONTAINERS)/intogen-combination.simg - -combination_container_srcdir = ${src_containers}/combination - -combination_code_folder = ${src_containers}/../../combination - -combination_container_src = ${combination_container_srcdir}/Singularity \ - ${combination_code_folder}/setup.py \ - $(wildcard ${combination_code_folder}/intogen_combination/*) \ - $(wildcard ${combination_code_folder}/*) - -$(COMBINATION_CONTAINER): ${combination_container_src} | $(INTOGEN_CONTAINERS) - @echo Building combination container - ${container_builder} ${combination_container_srcdir} $@ - -CONTAINERS_SUDO += $(COMBINATION_CONTAINER) \ No newline at end of file diff --git a/build/containers/core/Singularity b/build/containers/core/Singularity deleted file mode 100644 index d1199dde..00000000 --- a/build/containers/core/Singularity +++ /dev/null @@ -1,23 +0,0 @@ -Bootstrap: docker -From: python:3 - -%environment - export LC_ALL=C.UTF-8 - -%runscript - -%setup - cp -r ../../../core ${SINGULARITY_ROOTFS}/core/ - cp get_field.sh ${SINGULARITY_ROOTFS}/usr/local/bin/get_field.sh - chmod a+rx ${SINGULARITY_ROOTFS}/usr/local/bin/get_field.sh - chmod -R a+rx ${SINGULARITY_ROOTFS}/core/ - -%post - # Install openVariant - pip install open-variant - - # Install intogen-core package - pip install /core - - # Clean unused things - rm -rf /var/lib/apt/lists/* diff --git a/build/containers/core/core.mk b/build/containers/core/core.mk deleted file mode 100644 index 1454db37..00000000 --- a/build/containers/core/core.mk +++ /dev/null @@ -1,19 +0,0 @@ - -CORE_CONTAINER = $(INTOGEN_CONTAINERS)/intogen-core.simg - -core_container_srcdir = ${src_containers}/core - -core_code_folder = ${src_containers}/../../core - -core_container_src = ${core_container_srcdir}/Singularity \ - ${core_container_srcdir}/get_field.sh \ - ${core_code_folder}/setup.py \ - $(wildcard ${core_code_folder}/intogen_core/*.py) \ - $(wildcard ${core_code_folder}/intogen_core/*/*.py) \ - $(wildcard ${core_code_folder}/intogen_core/*/*/*.py) - -$(CORE_CONTAINER): ${core_container_src} | $(INTOGEN_CONTAINERS) - @echo Building core container - ${container_builder} ${core_container_srcdir} $@ - -CONTAINERS_SUDO += $(CORE_CONTAINER) \ No newline at end of file diff --git a/build/containers/core/get_field.sh b/build/containers/core/get_field.sh deleted file mode 100755 index 5df9d95b..00000000 --- a/build/containers/core/get_field.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash -set -e - -input=$1 -field=$2 - -C=1 -header=`zcat ${input} | head -n 1` -for i in ${header} -do - if [[ $i == "${field}" ]] - then - break - else - C=$(( $C + 1 )) - fi -done - -zcat ${input} | sed -n '2p' | cut -f$C | xargs printf \ No newline at end of file diff --git a/build/containers/deconstructsig/Singularity b/build/containers/deconstructsig/Singularity deleted file mode 100644 index 2caeb385..00000000 --- a/build/containers/deconstructsig/Singularity +++ /dev/null @@ -1,25 +0,0 @@ -Bootstrap: docker -From: debian:buster-slim - -%environment - export LC_ALL=C.UTF-8 - -%runscript - exec "/usr/bin/python3" "/deconstructsig/run_deconstruct.py" "$@" - -%setup - mkdir ${SINGULARITY_ROOTFS}/deconstructsig - cp run_deconstruct.py ${SINGULARITY_ROOTFS}/deconstructsig/ - cp deconstructSigs.r ${SINGULARITY_ROOTFS}/deconstructsig/ - cp signature_assignment.py ${SINGULARITY_ROOTFS}/deconstructsig/ - cp output_pass_drivers_01.csv ${SINGULARITY_ROOTFS}/deconstructsig/ - chmod -R a+rx ${SINGULARITY_ROOTFS}/deconstructsig/ - -%post - apt-get update - apt-get install -y python3-pandas python3-click r-bioc-bsgenome r-cran-gplots procps - rm -rf /var/lib/apt/lists/* - echo "install.packages(\"BiocManager\", repos=\"https://cran.r-project.org\")" | R --no-save - echo "BiocManager::install(\"BSgenome.Hsapiens.UCSC.hg19\")" | R --no-save - echo "BiocManager::install(\"BSgenome.Hsapiens.UCSC.hg38\")" | R --no-save - echo "install.packages(\"deconstructSigs\", repos=\"https://cran.r-project.org\")" | R --no-save diff --git a/build/containers/deconstructsig/deconstructsigs.mk b/build/containers/deconstructsig/deconstructsigs.mk deleted file mode 100644 index 9e023a93..00000000 --- a/build/containers/deconstructsig/deconstructsigs.mk +++ /dev/null @@ -1,12 +0,0 @@ - -DECONSTRUCTSIGS_CONTAINER = $(INTOGEN_CONTAINERS)/deconstructsigs.simg - -deconstructsigs_container_srcdir = ${src_containers}/deconstructsig - -deconstructsigs_container_src = $(wildcard ${deconstructsigs_container_srcdir}/*) - -$(DECONSTRUCTSIGS_CONTAINER): $(deconstructsigs_container_src) | $(INTOGEN_CONTAINERS) - @echo Building deconstructSigs container - ${container_builder} ${deconstructsigs_container_srcdir} $@ - -CONTAINERS_SUDO += $(DECONSTRUCTSIGS_CONTAINER) \ No newline at end of file diff --git a/build/containers/dndscv/Singularity b/build/containers/dndscv/Singularity deleted file mode 100644 index 882c1498..00000000 --- a/build/containers/dndscv/Singularity +++ /dev/null @@ -1,22 +0,0 @@ -Bootstrap: docker -From: debian:buster-slim - -%environment - export LC_ALL=C.UTF-8 - -%runscript - exec "Rscript" "/dndscv/dndscv.R" "$@" - -%setup - mkdir ${SINGULARITY_ROOTFS}/dndscv - cp dndscv.R ${SINGULARITY_ROOTFS}/dndscv/ - cp dndscv.tar.gz ${SINGULARITY_ROOTFS}/dndscv/ - chmod -R a+rx ${SINGULARITY_ROOTFS}/dndscv/ - -%post - apt-get update - apt-get -y install locales-all r-base r-cran-devtools r-bioc-biostrings r-bioc-genomicranges r-bioc-rsamtools r-cran-ade4 r-cran-seqinr procps - rm -rf /var/lib/apt/lists/* - # echo "library(devtools); install_github('im3sanger/dndscv')" | R --no-save - echo "library(devtools)" | R --no-save - R CMD INSTALL "/dndscv/dndscv.tar.gz" diff --git a/build/containers/dndscv/dndscv.mk b/build/containers/dndscv/dndscv.mk deleted file mode 100644 index bd38d28e..00000000 --- a/build/containers/dndscv/dndscv.mk +++ /dev/null @@ -1,14 +0,0 @@ - -DNDSCV_CONTAINER = $(INTOGEN_CONTAINERS)/dndscv.simg - -dndscv_container_srcdir = ${src_containers}/dndscv - -dndscv_container_src = ${dndscv_container_srcdir}/dndscv.R \ - ${dndscv_container_srcdir}/dndscv.tar.gz \ - ${dndscv_container_srcdir}/Singularity - -$(DNDSCV_CONTAINER): $(dndscv_container_src) | $(INTOGEN_CONTAINERS) - @echo Building dNdScv container - ${container_builder} ${dndscv_container_srcdir} $@ - -CONTAINERS_SUDO += $(DNDSCV_CONTAINER) \ No newline at end of file diff --git a/build/containers/dndscv/dndscv.tar.gz b/build/containers/dndscv/dndscv.tar.gz deleted file mode 100644 index 1225dd9e..00000000 --- a/build/containers/dndscv/dndscv.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c00bfd80ace2a1d5e38afa4031131ea44eecce9bb83f5502f0085dd2fe560db1 -size 114417210 diff --git a/build/containers/dndscv/dndscv.tar.gz.REMOVED.git-id b/build/containers/dndscv/dndscv.tar.gz.REMOVED.git-id deleted file mode 100644 index 7801ab9d..00000000 --- a/build/containers/dndscv/dndscv.tar.gz.REMOVED.git-id +++ /dev/null @@ -1 +0,0 @@ -ef1e05dfea6c66e5735c740499a5d9c5ed2990a6 \ No newline at end of file diff --git a/build/containers/hotmaps/Singularity b/build/containers/hotmaps/Singularity deleted file mode 100644 index 81090667..00000000 --- a/build/containers/hotmaps/Singularity +++ /dev/null @@ -1,19 +0,0 @@ -Bootstrap: docker -From: alpine:3.8 - -%environment - export LC_ALL=C.UTF-8 - -%runscript - exec "/bin/sh" "/hotmaps/hotmaps.sh" "$@" - -%setup - mkdir ${SINGULARITY_ROOTFS}/hotmaps - cp -r scripts ${SINGULARITY_ROOTFS}/hotmaps/ - cp hotmaps.sh ${SINGULARITY_ROOTFS}/hotmaps/ - chmod -R a+r,a+x ${SINGULARITY_ROOTFS}/hotmaps/ - -%post - apk add --no-cache python-dev py-pip py-curl zlib-dev make ncurses-dev g++ bash procps - pip --no-cache-dir install numpy==1.16.5 - pip --no-cache-dir install pandas==0.24.2 tqdm futures bgreference biopython==1.76 pyliftover diff --git a/build/containers/hotmaps/hotmaps.mk b/build/containers/hotmaps/hotmaps.mk deleted file mode 100644 index 588ab917..00000000 --- a/build/containers/hotmaps/hotmaps.mk +++ /dev/null @@ -1,13 +0,0 @@ - -HOTMAPS_CONTAINER = $(INTOGEN_CONTAINERS)/hotmaps.simg - -hotmaps_container_srcdir = ${src_containers}/hotmaps - -hotmaps_container_src = $(wildcard ${hotmaps_container_srcdir}/*) \ - ${hotmaps_container_srcdir}/hotmaps.sh ${hotmaps_container_srcdir}/Singularity - -$(HOTMAPS_CONTAINER): $(hotmaps_container_src) | $(INTOGEN_CONTAINERS) - @echo Building HotMAPS container - ${container_builder} ${hotmaps_container_srcdir} $@ - -CONTAINERS_SUDO += $(HOTMAPS_CONTAINER) \ No newline at end of file diff --git a/build/containers/hotmaps/hotmaps.sh b/build/containers/hotmaps/hotmaps.sh deleted file mode 100755 index 62577e52..00000000 --- a/build/containers/hotmaps/hotmaps.sh +++ /dev/null @@ -1,137 +0,0 @@ -#!/bin/sh -set -x - -# Script arguments -INPUT_FILE=$1 -OUTPUT_FOLDER=$2 -SIGNATURE_FILE=$3 -DATA_FOLDER=$4 -CORES=$5 - -# Enviroment variables -HYPERMUT=1000 -SCRIPTS_FOLDER="/hotmaps/scripts" - -DATASETS_FOLDER=$INTOGEN_DATASETS/hotmaps - - -# Preprocess -input_folder=$(dirname "${INPUT_FILE}") -input_name=$(basename "${INPUT_FILE}") -name=${input_name%.in.maf} - -TEMP_FOLDER="$OUTPUT_FOLDER/${name}.tmp" -mkdir -p $TEMP_FOLDER - -input_filename=${name}.maf -cp ${INPUT_FILE} ${TEMP_FOLDER}/${input_filename} - - -# TODO remove the file checks - -## STEP1. Map to Structure (output: non_filtered_mupit.INPUT_FILENAME) -if [ ! -f "$TEMP_FOLDER/non_filtered_mupit.${input_filename}" ] -then - python $SCRIPTS_FOLDER/map_maf_to_structure.py \ - --data-dir ${TEMP_FOLDER} \ - --match-regex ${input_filename} \ - --output-dir $TEMP_FOLDER \ - --database ${DATA_FOLDER}/mupit_database.db - rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi -fi - -## STEP2. Convert MAF to MUPIT (output: coverage_info.txt, INPUT_FILENAME.mupit ) -if [ ! -f "$TEMP_FOLDER/${input_filename}.mupit" ] -then - python $SCRIPTS_FOLDER/convert_maf_to_mupit.py \ - --maf ${TEMP_FOLDER}/${input_filename} \ - --tumor-type ${name} \ - --no-stratify \ - -mt $HYPERMUT \ - -i $TEMP_FOLDER \ - --output $TEMP_FOLDER/${input_filename}.mupit \ - --database ${DATA_FOLDER}/mupit_database.db - rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi -fi - -## STEP3. Filter hypermutated (output: mupit.INPUT_FILENAME) -if [ ! -f "$TEMP_FOLDER/mupit.${input_filename}" ] -then - python $SCRIPTS_FOLDER/filter_hypermutated.py \ - --raw-dir $TEMP_FOLDER \ - --match-regex ${input_filename} \ - --mut-threshold $HYPERMUT \ - --sample-col 'Tumor_Sample_Barcode' \ - --data-dir $TEMP_FOLDER - rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi -fi - -## STEP4. Count mutations (input: mupit.* output: collected.INPUT_FILENAME) -if [ ! -f "$TEMP_FOLDER/collected.${input_filename}" ] -then - python $SCRIPTS_FOLDER/count_mutations.py \ - --data-dir $TEMP_FOLDER - rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi -fi - -## STEP5. Format mutations table (input: collected.* output: mutation_tcga.INPUT_FILENAME.txt) -if [ ! -f "$TEMP_FOLDER/mutation_tcga.${input_filename}" ] -then - python $SCRIPTS_FOLDER/format_mutations_table.py \ - --data-dir $TEMP_FOLDER - rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi -fi - -## STEP7. Run HotMAPS (input: mutation_tcga.NAME.txt output:hotspot_INPUT_FILENAME) -if [ ! -f "$TEMP_FOLDER/hotspot_${input_filename}" ] -then - python $SCRIPTS_FOLDER/hotspot.py \ - --log-level=INFO \ - -m $TEMP_FOLDER/mutation_tcga.${name}.txt \ - -a ${DATA_FOLDER}/fully_described_pdb_info.txt \ - -t EVERY -n 10000 -r 10.0 -c $CORES \ - -o $TEMP_FOLDER/hotspot_${input_filename} \ - -e $TEMP_FOLDER/${input_filename}.err --log=stdout \ - -gc $DATASETS_FOLDER/coordinates.txt.gz \ - -S $SIGNATURE_FILE \ - --maf ${TEMP_FOLDER}/${input_filename} \ - --database ${DATA_FOLDER}/mupit_database.db \ - --pdb ${DATA_FOLDER}/pdb - rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi -fi - -## STEP8. Multiple test -if [ ! -f "$TEMP_FOLDER/mtco_${input_filename}" ] -then - python $SCRIPTS_FOLDER/multiple_testing_correction.py \ - -i $TEMP_FOLDER/hotspot_${input_filename} \ - -f min -q 0.05 \ - -m $TEMP_FOLDER/${input_filename}.mupit \ - -o $TEMP_FOLDER/mtco_${input_filename} \ - -s $TEMP_FOLDER/mtcs_${input_filename} - rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi -fi - -## STEP9. Find Hotspots regions gene -if [ ! -f "$TEMP_FOLDER/hotspot_gene_${input_filename}" ] -then - python $SCRIPTS_FOLDER/find_hotspot_regions_gene.py \ - -m $TEMP_FOLDER/mtco_${input_filename} \ - -a $TEMP_FOLDER/${input_filename}.mupit \ - -p ${DATA_FOLDER}/fully_described_pdb_info.txt \ - -r 10.0 -q 0.05 \ - -o $TEMP_FOLDER/hotspot_gene_${input_filename} \ - --pdb ${DATA_FOLDER}/pdb - rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi -fi - -## STEP10. Output parser -if [ ! -f "$OUTPUT_FOLDER/${name}.out.gz" ] -then - python ${SCRIPTS_FOLDER}/postprocess.py \ - ${TEMP_FOLDER}/hotspot_gene_${input_filename} \ - ${TEMP_FOLDER}/mtco_${input_filename} \ - ${OUTPUT_FOLDER}/${name}.out.gz \ - $OUTPUT_FOLDER/${name}.clusters.gz - rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi -fi \ No newline at end of file diff --git a/build/containers/cbase/Auxiliary/CBaSE_v1.1_parameters.py b/build/containers/intogen-cbase/Auxiliary/CBaSE_v1.1_parameters.py similarity index 100% rename from build/containers/cbase/Auxiliary/CBaSE_v1.1_parameters.py rename to build/containers/intogen-cbase/Auxiliary/CBaSE_v1.1_parameters.py diff --git a/build/containers/cbase/Auxiliary/CBaSE_v1.1_qvalues.py b/build/containers/intogen-cbase/Auxiliary/CBaSE_v1.1_qvalues.py similarity index 100% rename from build/containers/cbase/Auxiliary/CBaSE_v1.1_qvalues.py rename to build/containers/intogen-cbase/Auxiliary/CBaSE_v1.1_qvalues.py diff --git a/build/containers/intogen-cbase/Dockerfile b/build/containers/intogen-cbase/Dockerfile new file mode 100644 index 00000000..497eec0f --- /dev/null +++ b/build/containers/intogen-cbase/Dockerfile @@ -0,0 +1,20 @@ +FROM debian:buster-slim + +ENV LC_ALL=C.UTF-8 +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && \ + apt-get install -y --no-install-recommends --no-install-suggests \ + python2.7=2.7.16-2+deb10u4 \ + python-scipy=1.1.0-7 \ + python-statsmodels=0.8.0-9 \ + python-mpmath=1.1.0-1 && \ + rm -rf /var/lib/apt/lists/* + +RUN --mount=type=bind,target=/files \ + mkdir -p /cbase && \ + cp -a /files/Auxiliary /cbase/ && \ + cp /files/cbase.py /cbase/ && \ + chmod -R a+xr /cbase + +ENTRYPOINT ["/usr/bin/python", "/cbase/cbase.py"] diff --git a/build/containers/cbase/cbase.py b/build/containers/intogen-cbase/cbase.py similarity index 100% rename from build/containers/cbase/cbase.py rename to build/containers/intogen-cbase/cbase.py diff --git a/build/containers/intogen-combination b/build/containers/intogen-combination new file mode 120000 index 00000000..26b8d221 --- /dev/null +++ b/build/containers/intogen-combination @@ -0,0 +1 @@ +../../combination \ No newline at end of file diff --git a/build/containers/intogen-core b/build/containers/intogen-core new file mode 120000 index 00000000..3d25ddeb --- /dev/null +++ b/build/containers/intogen-core @@ -0,0 +1 @@ +../../core \ No newline at end of file diff --git a/build/containers/intogen-deconstructsig/Dockerfile b/build/containers/intogen-deconstructsig/Dockerfile new file mode 100644 index 00000000..fad9d38f --- /dev/null +++ b/build/containers/intogen-deconstructsig/Dockerfile @@ -0,0 +1,36 @@ +FROM debian:buster-slim + +ENV LC_ALL=C.UTF-8 +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && \ + apt-get install -y --no-install-recommends --no-install-suggests \ + python3.7-minimal=3.7.3-2+deb10u7 \ + python3-pandas=0.23.3+dfsg-3 \ + python3-click=7.0-1 \ + r-bioc-bsgenome=1.50.0-1 \ + r-cran-gplots=3.0.1.1-1 \ + procps=2:3.3.15-2 && \ + rm -rf /var/lib/apt/lists/* + +RUN R --vanilla <>> library("dndscv") ++>>> muts = read.table(gzfile("/workspace/projects/intogen_2017/runs/20190220/work/2d/9b30aa1d8a871b00fc11169696744a/dndscv/TARGET_WGS_WT_US.in.gz"), sep = '\t', header = TRUE) ++>>> dndscv(muts, refdb=file.path(Sys.getenv("INTOGEN_DATASETS"), "dndscv", "RefCDS.rda")) ++ ++ ++ ++writeOutput = 0 ++tryCatch({ ++ result <- dndscv(muts, refdb=file.path(Sys.getenv("INTOGEN_DATASETS"), "dndscv", "RefCDS.rda")) ++ writeOutput <- 1 ++}, error=function(e) { ++ message('dndsCV raised an error') ++ writeOutput <- 2 ++}) ++ ++ ++ ++write.table(result$sel_cv, gzfile(args[2]), quote=FALSE, sep='\t', row.names = FALSE) ++write.table(result$annotmuts, gzfile(args[3]), sep = "\t", quote = FALSE, row.names = FALSE) ++write.table(result$genemuts, gzfile(args[4]), sep = "\t", quote = FALSE, row.names = FALSE) +diff --git a/R/dndscv.R b/R/dndscv.R +index 8c86403..fe3cdb7 100755 +--- a/R/dndscv.R ++++ b/R/dndscv.R +@@ -399,6 +399,10 @@ dndscv = function(mutations, gene_list = NULL, refdb = "hg19", sm = "192r_3w", k + if (outp > 2) { + + message("[5] Running dNdScv...") ++ ++ if (sum(genemuts$n_syn) < 5) { ++ stop('Less then 5 synonymous mutations') ++ } + + # Covariates + if (is.null(cv)) { +@@ -412,17 +416,26 @@ dndscv = function(mutations, gene_list = NULL, refdb = "hg19", sm = "192r_3w", k + covs = covs[,1:maxcovs] + } + nbrdf = cbind(genemuts[,c("n_syn","exp_syn")], covs) +- ++ + # Negative binomial regression for substitutions + if (nrow(genemuts)<500) { # If there are <500 genes, we run the regression without covariates + model = MASS::glm.nb(n_syn ~ offset(log(exp_syn)) - 1 , data = nbrdf) + } else { + model = tryCatch({ ++ message('--normal') + MASS::glm.nb(n_syn ~ offset(log(exp_syn)) + . , data = nbrdf) # We try running the model with covariates + }, warning = function(w){ ++ message('--warning') + MASS::glm.nb(n_syn ~ offset(log(exp_syn)) - 1 , data = nbrdf) # If there are warnings or errors we run the model without covariates + }, error = function(e){ ++ message('--error') + MASS::glm.nb(n_syn ~ offset(log(exp_syn)) - 1 , data = nbrdf) # If there are warnings or errors we run the model without covariates ++ ++ # tryCatch({ ++ # MASS::glm.nb(n_syn ~ offset(log(exp_syn)) - 1 , data = nbrdf) # If there are warnings or errors we run the model without covariates ++ # }, error = function(e){ ++ # stop('MASS::gml.nb ERROR') ++ # }) + }) + } + message(sprintf(" Regression model for substitutions (theta = %0.3g).", model$theta)) +diff --git a/run_dndscv.R b/run_dndscv.R +new file mode 100644 +index 0000000..94c61c7 +--- /dev/null ++++ b/run_dndscv.R +@@ -0,0 +1,81 @@ ++ ++library("dndscv") ++ ++muts = read.table(gzfile('/workspace/projects/intogen_2017/runs/20190124/dndscv/CBIOP_WXS_MDS_TOKYO_2011.in.gz'), sep = '\t', header = TRUE) ++ ++writeOutput = 0 ++result = tryCatch({ ++ dndscv(muts, refdb="/workspace/projects/intogen_2017/pipeline/datasets/hg38_vep92_develop/dndscv/RefCDS.rda") ++ writeOutput = 1 ++}, error=function(e) { ++ message('There was an error') ++ writeOutput = 2 ++}) ++ ++if (writeOutput == 1) { ++ write.table(result$sel_cv, 'file1.txt', quote=FALSE, sep='\t', row.names = FALSE) ++ write.table(result$annotmuts, 'file2.txt', sep = "\t", quote = FALSE, row.names = FALSE) ++ write.table(result$genemuts, 'file3.txt', sep = "\t", quote = FALSE, row.names = FALSE) ++} else { ++ df = data.frame( ++ gene_name=character(), ++ n_syn=character(), ++ n_mis=character(), ++ n_non=character(), ++ n_spl=character(), ++ n_ind=character(), ++ wmis_cv=character(), ++ wnon_cv=character(), ++ wspl_cv=character(), ++ wind_cv=character(), ++ pmis_cv=character(), ++ ptrunc_cv=character(), ++ pallsubs_cv=character(), ++ pind_cv=character(), ++ qmis_cv=character(), ++ qtrunc_cv=character(), ++ qallsubs_cv=character(), ++ pglobal_cv=character(), ++ qglobal_cv=character(), ++ stringsAsFactors=FALSE ++ ) ++ write.table(df, 'file1.txt', sep = "\t", quote = FALSE, row.names = FALSE) ++ ++ df = data.frame( ++ sampleID=character(), ++ chr=character(), ++ pos=character(), ++ ref=character(), ++ mut=character(), ++ gene=character(), ++ strand=character(), ++ ref_cod=character(), ++ mut_cod=character(), ++ ref3_cod=character(), ++ mut3_cod=character(), ++ aachange=character(), ++ ntchange=character(), ++ codonsub=character(), ++ impact=character(), ++ pid=character(), ++ stringsAsFactors=FALSE ++ ) ++ write.table(df, 'file2.txt', sep = "\t", quote = FALSE, row.names = FALSE) ++ ++ df = data.frame( ++ gene_name=character(), ++ n_syn=character(), ++ n_mis=character(), ++ n_non=character(), ++ n_spl=character(), ++ exp_syn=character(), ++ exp_mis=character(), ++ exp_non=character(), ++ exp_spl=character(), ++ exp_syn_cv=character(), ++ stringsAsFactors=FALSE ++ ) ++ write.table(df, 'file3.txt', sep = "\t", quote = FALSE, row.names = FALSE) ++} ++ ++ diff --git a/build/containers/intogen-hotmaps/Dockerfile b/build/containers/intogen-hotmaps/Dockerfile new file mode 100644 index 00000000..c495969c --- /dev/null +++ b/build/containers/intogen-hotmaps/Dockerfile @@ -0,0 +1,26 @@ +FROM alpine:3.8 + +ENV LC_ALL=C.UTF-8 +ENV DEBIAN_FRONTEND=noninteractive + +RUN apk add --no-cache \ + python-dev=2.7.15-r3 \ + py-pip=10.0.1-r0 \ + py-curl=7.43.0-r5 \ + zlib-dev=1.2.11-r1 \ + make=4.2.1-r2 \ + ncurses-dev=6.1_p20180818-r1 \ + g++=6.4.0-r9 \ + bash=4.4.19-r1 \ + procps=3.3.15-r0 + +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install numpy==1.16.5 && \ + pip install pandas==0.24.2 tqdm==4.64.1 futures==3.4.0 bgreference==0.6 biopython==1.76 pyliftover==0.4.1 + +RUN --mount=type=bind,target=/files \ + mkdir -p /hotmaps && \ + cp -a /files/hotmaps.sh /files/scripts /hotmaps/ && \ + chmod -R a+r,a+x /hotmaps/ + +ENTRYPOINT [ "/bin/sh", "/hotmaps/hotmaps.sh" ] diff --git a/build/containers/intogen-hotmaps/hotmaps.sh b/build/containers/intogen-hotmaps/hotmaps.sh new file mode 100755 index 00000000..ae755cd8 --- /dev/null +++ b/build/containers/intogen-hotmaps/hotmaps.sh @@ -0,0 +1,144 @@ +#!/bin/bash +set -x + +# Script arguments +INPUT_FILE=$1 +OUTPUT_FOLDER=$2 +SIGNATURE_FILE=$3 +DATA_FOLDER=$4 +CORES=$5 + +# Enviroment variables +HYPERMUT=1000 +SCRIPTS_FOLDER="/hotmaps/scripts" + +DATASETS_FOLDER=$INTOGEN_DATASETS/hotmaps + +# Functions + +exit_on_error() { + exit_code=$1 + if [ "${exit_code}" != "0" ]; then + exit "${exit_code}" + fi +} + +# Preprocess +input_name=$(basename "${INPUT_FILE}") +name=${input_name%.in.maf} + +TEMP_FOLDER="$OUTPUT_FOLDER/${name}.tmp" +mkdir -p "${TEMP_FOLDER}" + +input_filename=${name}.maf +cp "${INPUT_FILE}" "${TEMP_FOLDER}/${input_filename}" + + +# TODO remove the file checks + +## STEP1. Map to Structure (output: non_filtered_mupit.INPUT_FILENAME) +if [ ! -f "${TEMP_FOLDER}/non_filtered_mupit.${input_filename}" ] +then + python $SCRIPTS_FOLDER/map_maf_to_structure.py \ + --data-dir "${TEMP_FOLDER}" \ + --match-regex "${input_filename}" \ + --output-dir "${TEMP_FOLDER}" \ + --database "${DATA_FOLDER}/mupit_database.db" + exit_on_error $? +fi + +## STEP2. Convert MAF to MUPIT (output: coverage_info.txt, INPUT_FILENAME.mupit ) +if [ ! -f "${TEMP_FOLDER}/${input_filename}.mupit" ] +then + python $SCRIPTS_FOLDER/convert_maf_to_mupit.py \ + --maf "${TEMP_FOLDER}/${input_filename}" \ + --tumor-type "${name}" \ + --no-stratify \ + -mt "${HYPERMUT}" \ + -i "${TEMP_FOLDER}" \ + --output "${TEMP_FOLDER}/${input_filename}.mupit" \ + --database "${DATA_FOLDER}/mupit_database.db" + exit_on_error $? +fi + +## STEP3. Filter hypermutated (output: mupit.INPUT_FILENAME) +if [ ! -f "${TEMP_FOLDER}/mupit.${input_filename}" ] +then + python $SCRIPTS_FOLDER/filter_hypermutated.py \ + --raw-dir "${TEMP_FOLDER}" \ + --match-regex "${input_filename}" \ + --mut-threshold "${HYPERMUT}" \ + --sample-col 'Tumor_Sample_Barcode' \ + --data-dir "${TEMP_FOLDER}" + exit_on_error $? +fi + +## STEP4. Count mutations (input: mupit.* output: collected.INPUT_FILENAME) +if [ ! -f "${TEMP_FOLDER}/collected.${input_filename}" ] +then + python "${SCRIPTS_FOLDER}/count_mutations.py" \ + --data-dir "${TEMP_FOLDER}" + exit_on_error $? +fi + +## STEP5. Format mutations table (input: collected.* output: mutation_tcga.INPUT_FILENAME.txt) +if [ ! -f "${TEMP_FOLDER}/mutation_tcga.${input_filename}" ] +then + python "${SCRIPTS_FOLDER}/format_mutations_table.py" \ + --data-dir "${TEMP_FOLDER}" + exit_on_error $? +fi + +## STEP7. Run HotMAPS (input: mutation_tcga.NAME.txt output:hotspot_INPUT_FILENAME) +if [ ! -f "${TEMP_FOLDER}/hotspot_${input_filename}" ] +then + python "${SCRIPTS_FOLDER}/hotspot.py" \ + --log-level=INFO \ + -m "${TEMP_FOLDER}/mutation_tcga.${name}.txt" \ + -a "${DATA_FOLDER}/fully_described_pdb_info.txt" \ + -t EVERY -n 10000 -r 10.0 -c "${CORES}" \ + -o "${TEMP_FOLDER}/hotspot_${input_filename}" \ + -e "${TEMP_FOLDER}/${input_filename}.err --log=stdout" \ + -gc "${DATASETS_FOLDER}/coordinates.txt.gz" \ + -S "${SIGNATURE_FILE}" \ + --maf "${TEMP_FOLDER}/${input_filename}" \ + --database "${DATA_FOLDER}/mupit_database.db" \ + --pdb "${DATA_FOLDER}/pdb" + exit_on_error $? +fi + +## STEP8. Multiple test +if [ ! -f "${TEMP_FOLDER}/mtco_${input_filename}" ] +then + python "${SCRIPTS_FOLDER}/multiple_testing_correction.py" \ + -i "${TEMP_FOLDER}/hotspot_${input_filename}" \ + -f min -q 0.05 \ + -m "${TEMP_FOLDER}/${input_filename}.mupit" \ + -o "${TEMP_FOLDER}/mtco_${input_filename}" \ + -s "${TEMP_FOLDER}/mtcs_${input_filename}" + exit_on_error $? +fi + +## STEP9. Find Hotspots regions gene +if [ ! -f "${TEMP_FOLDER}/hotspot_gene_${input_filename}" ] +then + python "${SCRIPTS_FOLDER}/find_hotspot_regions_gene.py" \ + -m "${TEMP_FOLDER}/mtco_${input_filename}" \ + -a "${TEMP_FOLDER}/${input_filename}.mupit" \ + -p "${DATA_FOLDER}/fully_described_pdb_info.txt" \ + -r 10.0 -q 0.05 \ + -o "${TEMP_FOLDER}/hotspot_gene_${input_filename}" \ + --pdb "${DATA_FOLDER}/pdb" + exit_on_error $? +fi + +## STEP10. Output parser +if [ ! -f "$OUTPUT_FOLDER/${name}.out.gz" ] +then + python "${SCRIPTS_FOLDER}/postprocess.py" \ + "${TEMP_FOLDER}/hotspot_gene_${input_filename}" \ + "${TEMP_FOLDER}/mtco_${input_filename}" \ + "${OUTPUT_FOLDER}/${name}.out.gz" \ + "${OUTPUT_FOLDER}/${name}.clusters.gz" + exit_on_error $? +fi diff --git a/build/containers/hotmaps/scripts/convert_maf_to_mupit.py b/build/containers/intogen-hotmaps/scripts/convert_maf_to_mupit.py similarity index 100% rename from build/containers/hotmaps/scripts/convert_maf_to_mupit.py rename to build/containers/intogen-hotmaps/scripts/convert_maf_to_mupit.py diff --git a/build/containers/hotmaps/scripts/count_mutations.py b/build/containers/intogen-hotmaps/scripts/count_mutations.py similarity index 100% rename from build/containers/hotmaps/scripts/count_mutations.py rename to build/containers/intogen-hotmaps/scripts/count_mutations.py diff --git a/build/containers/hotmaps/scripts/filter_hypermutated.py b/build/containers/intogen-hotmaps/scripts/filter_hypermutated.py similarity index 100% rename from build/containers/hotmaps/scripts/filter_hypermutated.py rename to build/containers/intogen-hotmaps/scripts/filter_hypermutated.py diff --git a/build/containers/hotmaps/scripts/find_hotspot_regions_gene.py b/build/containers/intogen-hotmaps/scripts/find_hotspot_regions_gene.py similarity index 100% rename from build/containers/hotmaps/scripts/find_hotspot_regions_gene.py rename to build/containers/intogen-hotmaps/scripts/find_hotspot_regions_gene.py diff --git a/build/containers/hotmaps/scripts/format_mutations_table.py b/build/containers/intogen-hotmaps/scripts/format_mutations_table.py similarity index 100% rename from build/containers/hotmaps/scripts/format_mutations_table.py rename to build/containers/intogen-hotmaps/scripts/format_mutations_table.py diff --git a/build/containers/hotmaps/scripts/get_hotspot_residues.py b/build/containers/intogen-hotmaps/scripts/get_hotspot_residues.py similarity index 100% rename from build/containers/hotmaps/scripts/get_hotspot_residues.py rename to build/containers/intogen-hotmaps/scripts/get_hotspot_residues.py diff --git a/build/containers/hotmaps/scripts/hotspot.py b/build/containers/intogen-hotmaps/scripts/hotspot.py similarity index 100% rename from build/containers/hotmaps/scripts/hotspot.py rename to build/containers/intogen-hotmaps/scripts/hotspot.py diff --git a/build/containers/hotmaps/scripts/maf_utils.py b/build/containers/intogen-hotmaps/scripts/maf_utils.py similarity index 100% rename from build/containers/hotmaps/scripts/maf_utils.py rename to build/containers/intogen-hotmaps/scripts/maf_utils.py diff --git a/build/containers/hotmaps/scripts/map_maf_to_structure.py b/build/containers/intogen-hotmaps/scripts/map_maf_to_structure.py similarity index 100% rename from build/containers/hotmaps/scripts/map_maf_to_structure.py rename to build/containers/intogen-hotmaps/scripts/map_maf_to_structure.py diff --git a/build/containers/hotmaps/scripts/multiple_testing_correction.py b/build/containers/intogen-hotmaps/scripts/multiple_testing_correction.py similarity index 100% rename from build/containers/hotmaps/scripts/multiple_testing_correction.py rename to build/containers/intogen-hotmaps/scripts/multiple_testing_correction.py diff --git a/build/containers/hotmaps/scripts/postprocess.py b/build/containers/intogen-hotmaps/scripts/postprocess.py similarity index 100% rename from build/containers/hotmaps/scripts/postprocess.py rename to build/containers/intogen-hotmaps/scripts/postprocess.py diff --git a/build/containers/hotmaps/scripts/src/__init__.py b/build/containers/intogen-hotmaps/scripts/src/__init__.py similarity index 100% rename from build/containers/hotmaps/scripts/src/__init__.py rename to build/containers/intogen-hotmaps/scripts/src/__init__.py diff --git a/build/containers/hotmaps/scripts/src/density.py b/build/containers/intogen-hotmaps/scripts/src/density.py similarity index 100% rename from build/containers/hotmaps/scripts/src/density.py rename to build/containers/intogen-hotmaps/scripts/src/density.py diff --git a/build/containers/hotmaps/scripts/src/graph.py b/build/containers/intogen-hotmaps/scripts/src/graph.py similarity index 100% rename from build/containers/hotmaps/scripts/src/graph.py rename to build/containers/intogen-hotmaps/scripts/src/graph.py diff --git a/build/containers/hotmaps/scripts/src/mutations.py b/build/containers/intogen-hotmaps/scripts/src/mutations.py similarity index 100% rename from build/containers/hotmaps/scripts/src/mutations.py rename to build/containers/intogen-hotmaps/scripts/src/mutations.py diff --git a/build/containers/hotmaps/scripts/src/pdb_structure.py b/build/containers/intogen-hotmaps/scripts/src/pdb_structure.py similarity index 100% rename from build/containers/hotmaps/scripts/src/pdb_structure.py rename to build/containers/intogen-hotmaps/scripts/src/pdb_structure.py diff --git a/build/containers/hotmaps/scripts/src/randomizer_aa.py b/build/containers/intogen-hotmaps/scripts/src/randomizer_aa.py similarity index 100% rename from build/containers/hotmaps/scripts/src/randomizer_aa.py rename to build/containers/intogen-hotmaps/scripts/src/randomizer_aa.py diff --git a/build/containers/hotmaps/scripts/src/simulate_mutations_signatures.py b/build/containers/intogen-hotmaps/scripts/src/simulate_mutations_signatures.py similarity index 100% rename from build/containers/hotmaps/scripts/src/simulate_mutations_signatures.py rename to build/containers/intogen-hotmaps/scripts/src/simulate_mutations_signatures.py diff --git a/build/containers/hotmaps/scripts/src/simulation_signatures.py b/build/containers/intogen-hotmaps/scripts/src/simulation_signatures.py similarity index 100% rename from build/containers/hotmaps/scripts/src/simulation_signatures.py rename to build/containers/intogen-hotmaps/scripts/src/simulation_signatures.py diff --git a/build/containers/hotmaps/scripts/src/statistics.py b/build/containers/intogen-hotmaps/scripts/src/statistics.py similarity index 100% rename from build/containers/hotmaps/scripts/src/statistics.py rename to build/containers/intogen-hotmaps/scripts/src/statistics.py diff --git a/build/containers/hotmaps/scripts/src/utils.py b/build/containers/intogen-hotmaps/scripts/src/utils.py similarity index 100% rename from build/containers/hotmaps/scripts/src/utils.py rename to build/containers/intogen-hotmaps/scripts/src/utils.py diff --git a/build/containers/intogen-mutpanning/Dockerfile b/build/containers/intogen-mutpanning/Dockerfile new file mode 100644 index 00000000..0d681cbe --- /dev/null +++ b/build/containers/intogen-mutpanning/Dockerfile @@ -0,0 +1,46 @@ +FROM debian:buster-slim AS build + +ENV LC_ALL=C.UTF-8 +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && \ + apt-get install -y --no-install-recommends --no-install-suggests \ + openjdk-11-jdk-headless=11.0.23+9-1~deb10u1 \ + git=1:2.20.1-2+deb10u9 && \ + rm -rf /var/lib/apt/lists/* + +RUN git clone https://github.com/genepattern/MutPanning.git --branch v2 && \ + cd MutPanning && \ + javac -cp bin/commons-math3-3.6.1.jar:bin/jdistlib-0.4.5-bin.jar \ + src/AffinityCount_Cosmic.java src/AffinityCount.java src/AlignHG19.java \ + src/CBASE_Solutions.java src/ClusteringEntity.java src/ClusteringPanCancer.java \ + src/ComputeMutationRateClusters_Entities.java src/ComputeSignificance.java \ + src/CountDestructiveMutations.java src/Filter_Step1.java src/Filter_Step2.java \ + src/Filter_Step3.java src/MutPanning.java src/ReformatCBASE.java -d bin && \ + cd bin && \ + printf "Manifest-Version: 1.0\nMain-Class: MutPanning\nClass-Path: commons-math3-3.6.1.jar jdistlib-0.4.5-bin.jar\n" >MANIFEST.MF && \ + jar cfm MutPanning.jar MANIFEST.MF ./*.class && \ + rm ./*.class MANIFEST.MF + +FROM debian:buster-slim AS final + +ENV LC_ALL=C.UTF-8 +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && \ + apt-get install -y --no-install-recommends --no-install-suggests \ + openjdk-11-jre-headless=11.0.23+9-1~deb10u1 \ + python3.7-minimal=3.7.3-2+deb10u7 \ + procps=2:3.3.15-2 && \ + apt-get remove -y python3-pip && \ + rm -rf /var/lib/apt/lists/* + +RUN mkdir -p /mutpanning +COPY --from=build /MutPanning/bin/*.jar /mutpanning/ + +# argument 0: root file, where all the other files can be found +# argument 1: maf file (standard value: root file/MutationsComplete.maf) +# argument 2: sample annotation file (standard value: root file/SamplesComplete.txt) +# argument 3: path to Hg19 folder (standard value root file/Hg19/) +# java -cp MutPanning.jar MutPanning Hg19/ +ENTRYPOINT [ "/usr/bin/java", "-cp", "/mutpanning/MutPanning.jar", "MutPanning" ] diff --git a/build/containers/intogen-oncodriveclustl/Dockerfile b/build/containers/intogen-oncodriveclustl/Dockerfile new file mode 100644 index 00000000..00f94971 --- /dev/null +++ b/build/containers/intogen-oncodriveclustl/Dockerfile @@ -0,0 +1,6 @@ +FROM python:3.12-slim + +RUN --mount=type=cache,target=/root/.cache/pip \ + pip3 install oncodriveclustl==1.1.3 + +ENTRYPOINT [ "/usr/local/bin/oncodriveclustl" ] diff --git a/build/containers/intogen-oncodrivefml/Dockerfile b/build/containers/intogen-oncodrivefml/Dockerfile new file mode 100644 index 00000000..97132001 --- /dev/null +++ b/build/containers/intogen-oncodrivefml/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.6-slim + +ENV BBGLAB_HOME=/oncodrivefml + +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install oncodrivefml==2.1.3 + +RUN --mount=type=bind,target=/files \ + mkdir -p /oncodrivefml && \ + cp /files/oncodrivefml_v2.conf /oncodrivefml/ && \ + chmod -R a+r /oncodrivefml + +WORKDIR /oncodrivefml + +ENTRYPOINT [ "/usr/local/bin/oncodrivefml" ] diff --git a/build/containers/oncodrivefml/oncodrivefml_v2.conf b/build/containers/intogen-oncodrivefml/oncodrivefml_v2.conf similarity index 100% rename from build/containers/oncodrivefml/oncodrivefml_v2.conf rename to build/containers/intogen-oncodrivefml/oncodrivefml_v2.conf diff --git a/build/containers/intogen-signature/Dockerfile b/build/containers/intogen-signature/Dockerfile new file mode 100644 index 00000000..bd3e9756 --- /dev/null +++ b/build/containers/intogen-signature/Dockerfile @@ -0,0 +1,4 @@ +FROM python:3.12-slim + +RUN --mount=type=cache,target=/root/.cache/pip \ + pip3 install bgsignature==0.2 diff --git a/build/containers/intogen-smregions/Dockerfile b/build/containers/intogen-smregions/Dockerfile new file mode 100644 index 00000000..95613390 --- /dev/null +++ b/build/containers/intogen-smregions/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.12-slim + +ENV BBGLAB_HOME=/smregions + +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install https://bitbucket.org/bbglab/smregions/get/master.tar.gz + +RUN --mount=type=bind,target=/files \ + mkdir -p /smregions && \ + cp /files/smregions.conf /smregions && \ + chmod -R a+r /smregions + +WORKDIR /smregions + +ENTRYPOINT [ "/usr/local/bin/smregions" ] diff --git a/build/containers/smregions/smregions.conf b/build/containers/intogen-smregions/smregions.conf similarity index 100% rename from build/containers/smregions/smregions.conf rename to build/containers/intogen-smregions/smregions.conf diff --git a/build/containers/intogen-transvar/Dockerfile b/build/containers/intogen-transvar/Dockerfile new file mode 100644 index 00000000..0615b880 --- /dev/null +++ b/build/containers/intogen-transvar/Dockerfile @@ -0,0 +1,10 @@ +FROM zhouwanding/transvar:2.5.9 + +ENV LC_ALL=C.UTF-8 +ENV TRANSVAR_CFG=/data/transvar.cfg +ENV TRANSVAR_DOWNLOAD_DIR=/data + +RUN rm -rf /anno && \ + mkdir -p /data + +ENTRYPOINT [ "/usr/local/bin/transvar" ] diff --git a/build/containers/mutpanning/Singularity b/build/containers/mutpanning/Singularity deleted file mode 100644 index 562c6df1..00000000 --- a/build/containers/mutpanning/Singularity +++ /dev/null @@ -1,32 +0,0 @@ -Bootstrap: docker -From: debian:buster-slim - -%environment - export LC_ALL=C.UTF-8 - -%runscript - exec "/usr/bin/java" "-cp" "/mutpanning/MutPanning.jar" "MutPanning" "$@" - # argument 0: root file, where all the other files can be found - # argument 1: maf file (standard value: root file/MutationsComplete.maf) - # argument 2: sample annotation file (standard value: root file/SamplesComplete.txt) - # argument 3: path to Hg19 folder (standard value root file/Hg19/) - # java -cp MutPanning.jar MutPanning Hg19/ - -%setup - mkdir ${SINGULARITY_ROOTFS}/mutpanning - cp MutPanning.jar ${SINGULARITY_ROOTFS}/mutpanning/ - chmod -R a+rx ${SINGULARITY_ROOTFS}/mutpanning/ - -%post - # Load environment - export LC_ALL=C.UTF-8 - - # Install dependencies - apt update - # apt upgrade -y - apt install -y openjdk-11-jre python3 procps - - # Clean unused things - apt-get clean - apt remove -y python3-pip - rm -rf /var/lib/apt/lists/* diff --git a/build/containers/mutpanning/mutpanning.mk b/build/containers/mutpanning/mutpanning.mk deleted file mode 100644 index 9538a61e..00000000 --- a/build/containers/mutpanning/mutpanning.mk +++ /dev/null @@ -1,14 +0,0 @@ - -MUTPANNING_CONTAINER = $(INTOGEN_CONTAINERS)/mutpanning.simg - -mutpanning_container_srcdir = ${src_containers}/mutpanning - -mutpanning_container_src = ${mutpanning_container_srcdir}/MutPanning.jar \ - ${mutpanning_container_srcdir}/Singularity - - -$(MUTPANNING_CONTAINER): $(mutpanning_container_src) | $(INTOGEN_CONTAINERS) - @echo Building MutPanning container - ${container_builder} ${mutpanning_container_srcdir} $@ - -CONTAINERS_SUDO += $(MUTPANNING_CONTAINER) \ No newline at end of file diff --git a/build/containers/oncodriveclustl/Singularity b/build/containers/oncodriveclustl/Singularity deleted file mode 100644 index 239ad5d6..00000000 --- a/build/containers/oncodriveclustl/Singularity +++ /dev/null @@ -1,6 +0,0 @@ -Bootstrap: docker -From: python:3 - -%post - # Install OncodriveCLUSTL - pip3 --no-cache-dir install oncodriveclustl \ No newline at end of file diff --git a/build/containers/oncodriveclustl/clustl.mk b/build/containers/oncodriveclustl/clustl.mk deleted file mode 100644 index 5812dfeb..00000000 --- a/build/containers/oncodriveclustl/clustl.mk +++ /dev/null @@ -1,12 +0,0 @@ - -CLUSTL_CONTAINER = $(INTOGEN_CONTAINERS)/oncodriveclustl.simg - -clustl_container_srcdir = ${src_containers}/oncodriveclustl - -clustl_container_src = ${clustl_container_srcdir}/Singularity - -$(CLUSTL_CONTAINER): $(clustl_container_src) | $(INTOGEN_CONTAINERS) - @echo Building OncodriveCLUSTL container - ${container_builder} ${clustl_container_srcdir} $@ - -CONTAINERS_SUDO += $(CLUSTL_CONTAINER) \ No newline at end of file diff --git a/build/containers/oncodrivefml/Singularity b/build/containers/oncodrivefml/Singularity deleted file mode 100644 index 69a3214f..00000000 --- a/build/containers/oncodrivefml/Singularity +++ /dev/null @@ -1,17 +0,0 @@ -Bootstrap: docker -From: python:3.6 - -%environment - export BBGLAB_HOME=/oncodrivefml - -%runscript - -%setup - mkdir ${SINGULARITY_ROOTFS}/oncodrivefml - cp oncodrivefml_v2.conf ${SINGULARITY_ROOTFS}/oncodrivefml/ - chmod -R a+rx ${SINGULARITY_ROOTFS}/oncodrivefml/ - -%post - - # Install OncodriveFML - pip --no-cache-dir install oncodrivefml diff --git a/build/containers/oncodrivefml/fml.mk b/build/containers/oncodrivefml/fml.mk deleted file mode 100644 index 2d88359f..00000000 --- a/build/containers/oncodrivefml/fml.mk +++ /dev/null @@ -1,13 +0,0 @@ - -FML_CONTAINER = $(INTOGEN_CONTAINERS)/oncodrivefml.simg - -fml_container_srcdir = ${src_containers}/oncodrivefml - -fml_container_src = ${fml_container_srcdir}/oncodrivefml_v2.conf \ - ${fml_container_srcdir}/Singularity - -$(FML_CONTAINER): $(fml_container_src) | $(INTOGEN_CONTAINERS) - @echo Building OncodriveFML container - ${container_builder} ${fml_container_srcdir} $@ - -CONTAINERS_SUDO += $(FML_CONTAINER) \ No newline at end of file diff --git a/build/containers/signature/Singularity b/build/containers/signature/Singularity deleted file mode 100644 index 0c8918cf..00000000 --- a/build/containers/signature/Singularity +++ /dev/null @@ -1,5 +0,0 @@ -Bootstrap: docker -From: python:3 - -%post - pip install bgsignature diff --git a/build/containers/signature/signature.mk b/build/containers/signature/signature.mk deleted file mode 100644 index f9f11d38..00000000 --- a/build/containers/signature/signature.mk +++ /dev/null @@ -1,12 +0,0 @@ - -SIGNATURE_CONTAINER = $(INTOGEN_CONTAINERS)/signature.simg - -signature_container_srcdir = ${src_containers}/signature - -signature_container_src = ${signature_container_srcdir}/Singularity - -$(SIGNATURE_CONTAINER): ${signature_container_src} | $(INTOGEN_CONTAINERS) - @echo Building bgSignature container - ${container_builder} ${signature_container_srcdir} $@ - -CONTAINERS_SUDO += $(SIGNATURE_CONTAINER) \ No newline at end of file diff --git a/build/containers/smregions/Singularity b/build/containers/smregions/Singularity deleted file mode 100644 index c1aee28b..00000000 --- a/build/containers/smregions/Singularity +++ /dev/null @@ -1,18 +0,0 @@ -Bootstrap: docker -From: python:3 - -%environment - export BBGLAB_HOME=/smregions - -%runscript - exec "/usr/local/bin/smregions" "$@" - -%setup - mkdir ${SINGULARITY_ROOTFS}/smregions - cp smregions.conf ${SINGULARITY_ROOTFS}/smregions/ - chmod -R a+r,a+x ${SINGULARITY_ROOTFS}/smregions/ - -%post - - # Install SMRegions - pip --no-cache-dir install https://bitbucket.org/bbglab/smregions/get/master.tar.gz diff --git a/build/containers/smregions/smregions.mk b/build/containers/smregions/smregions.mk deleted file mode 100644 index 6f286c9d..00000000 --- a/build/containers/smregions/smregions.mk +++ /dev/null @@ -1,14 +0,0 @@ - -SMREGIONS_CONTAINER = $(INTOGEN_CONTAINERS)/smregions.simg - -smregions_container_srcdir = ${src_containers}/smregions - -smregions_container_src = ${smregions_container_srcdir}/smregions.conf \ - ${smregions_container_srcdir}/Singularity - -$(SMREGIONS_CONTAINER): $(smregions_container_src) | $(INTOGEN_CONTAINERS) - @echo Building SMRegions container - ${container_builder} ${smregions_container_srcdir} $@ - - -CONTAINERS_SUDO += $(SMREGIONS_CONTAINER) \ No newline at end of file diff --git a/build/containers/transvar/Singularity b/build/containers/transvar/Singularity deleted file mode 100644 index 9eb8ff02..00000000 --- a/build/containers/transvar/Singularity +++ /dev/null @@ -1,18 +0,0 @@ -Bootstrap: docker -From: zhouwanding/transvar - -%environment - LC_ALL=C.UTF-8 - TRANSVAR_CFG=/data/transvar.cfg - TRANSVAR_DOWNLOAD_DIR=/data - export LC_ALL TRANSVAR_CFG TRANSVAR_DOWNLOAD_DIR - -%runscript - exec "/usr/local/bin/transvar" "$@" - -%post - rm -rf /anno - mkdir /data - -%test - transvar --help \ No newline at end of file diff --git a/build/containers/transvar/transvar.mk b/build/containers/transvar/transvar.mk deleted file mode 100644 index 80b539b8..00000000 --- a/build/containers/transvar/transvar.mk +++ /dev/null @@ -1,16 +0,0 @@ - -#$(CONTAINER_TRANSVAR): | $(INTOGEN_CONTAINERS) -# singularity build $@ docker://zhouwanding/transvar - - -TRANSVAR_CONTAINER = $(INTOGEN_CONTAINERS)/transvar.simg - -transvar_container_srcdir = ${src_containers}/transvar - -transvar_container_src = ${transvar_container_srcdir}/Singularity - -$(TRANSVAR_CONTAINER): $(transvar_container_src) | $(INTOGEN_CONTAINERS) - @echo Building TransVar container - ${container_builder} ${transvar_container_srcdir} $@ - -CONTAINERS_SUDO += $(TRANSVAR_CONTAINER) \ No newline at end of file diff --git a/build/containers/vep/releases.txt b/build/containers/vep/releases.txt deleted file mode 100644 index 42c96f72..00000000 --- a/build/containers/vep/releases.txt +++ /dev/null @@ -1,16 +0,0 @@ -111.0 -110.0 -101.0 -100.4 -99.2 -98.3 -97.4 -96.3 -95.3 -94.5 -93.7 -92.1 -91.3 -90.10 -89.9 -88.14 \ No newline at end of file diff --git a/build/containers/vep/vep.mk b/build/containers/vep/vep.mk deleted file mode 100644 index 26f684fb..00000000 --- a/build/containers/vep/vep.mk +++ /dev/null @@ -1,13 +0,0 @@ - -# Docs on the VEP docker image: https://www.ensembl.org/info/docs/tools/vep/script/vep_download.html#docker - -VEP_CONTAINER = $(INTOGEN_CONTAINERS)/vep.simg - -vep_container_releases_file = ${src_containers}/vep/releases.txt -vep_container_version = `grep "^${ensembl}" ${vep_container_releases_file}` - -$(VEP_CONTAINER): $(vep_container_releases_file) $$(ENSEMBL) | $(INTOGEN_CONTAINERS) - @echo Building VEP container - singularity build $@ docker://ensemblorg/ensembl-vep:release_${vep_container_version} - -CONTAINERS_USER += $(VEP_CONTAINER) diff --git a/combination/Dockerfile b/combination/Dockerfile new file mode 100644 index 00000000..dedeb56c --- /dev/null +++ b/combination/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.9-slim + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && \ + apt-get install -y --no-install-recommends --no-install-suggests build-essential=12.9 && \ + rm -rf /var/lib/apt/lists/* + +RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=bind,target=/files \ + mkdir -p /combination && \ + cd /files && \ + cp -a intogen_combination /combination/ && \ + cp LICENSE.txt MANIFEST.in README.rst requirements.txt setup.py /combination/ && \ + pip install /combination + +ENTRYPOINT [ "/usr/local/bin/intogen-combine" ] diff --git a/core/Dockerfile b/core/Dockerfile new file mode 100644 index 00000000..ae25c3b4 --- /dev/null +++ b/core/Dockerfile @@ -0,0 +1,21 @@ +FROM python:3.12-slim + +ENV LC_ALL=C.UTF-8 +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && \ + apt-get install -y --no-install-recommends --no-install-suggests \ + build-essential=12.9 \ + zlib1g-dev=1:1.2.13.dfsg-1 && \ + rm -rf /var/lib/apt/lists/* + +RUN --mount=type=cache,target=/root/.cache/pip \ + --mount=type=bind,target=/files \ + pip install open-variant==1.0.0 && \ + mkdir -p /core && \ + cd /files && \ + cp -a intogen_core /combination/ && \ + cp LICENSE.txt MANIFEST.in README.rst requirements.txt setup.py /core/ && \ + pip install /core && \ + cp /files/get_field.sh /usr/local/bin/ && \ + chmod a+rx /usr/local/bin/get_field.sh diff --git a/core/get_field.sh b/core/get_field.sh new file mode 100755 index 00000000..2dfb0423 --- /dev/null +++ b/core/get_field.sh @@ -0,0 +1,19 @@ +#!/bin/bash +set -e + +input=$1 +field=$2 + +C=1 +header=$( zcat "${input}" | head -n 1 ) +for i in ${header} +do + if [[ "$i" == "${field}" ]] + then + break + else + C=$(( C + 1 )) + fi +done + +zcat "${input}" | sed -n '2p' | cut -f$C | xargs printf