Skip to content

Commit

Permalink
chore: Merge branch 'main' into iss-72
Browse files Browse the repository at this point in the history
  • Loading branch information
kelly-sovacool committed Feb 7, 2024
2 parents 227befc + 37c3850 commit d0f8621
Show file tree
Hide file tree
Showing 81 changed files with 3,401 additions and 2,811 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:
branches:
- main
paths:
- 'docs/**'
- "docs/**"

jobs:
deploy:
Expand All @@ -14,7 +14,7 @@ jobs:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: 3.9
python-version: 3.11
- run: pip install --upgrade pip
- run: pip install -r docs/requirements.txt
- run: mkdocs gh-deploy --force
166 changes: 83 additions & 83 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,86 +13,86 @@ jobs:
Dry_Run_and_Lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: docker://snakemake/snakemake:v6.8.2
- name: check CLI basics
run: |
./xavier --help
./xavier --version
- name: Tumor-normal FastQ Dry Run
run: |
docker run -v $PWD:/opt2 snakemake/snakemake:v6.8.2 \
/opt2/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.R1.fastq.gz /opt2/.tests/Sample10_ARK1_S37.R2.fastq.gz \
/opt2/.tests/Sample11_ACI_158_S38.R1.fastq.gz /opt2/.tests/Sample11_ACI_158_S38.R2.fastq.gz \
/opt2/.tests/Sample4_CRL1622_S31.R1.fastq.gz /opt2/.tests/Sample4_CRL1622_S31.R2.fastq.gz \
--output /opt2/output_tn_fqs --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--pairs /opt2/.tests/pairs.tsv --genome hg38 --mode local --ffpe --cnv --runmode init
docker run -v $PWD:/opt2 snakemake/snakemake:v6.8.2 \
/opt2/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.R1.fastq.gz /opt2/.tests/Sample10_ARK1_S37.R2.fastq.gz \
/opt2/.tests/Sample11_ACI_158_S38.R1.fastq.gz /opt2/.tests/Sample11_ACI_158_S38.R2.fastq.gz \
/opt2/.tests/Sample4_CRL1622_S31.R1.fastq.gz /opt2/.tests/Sample4_CRL1622_S31.R2.fastq.gz \
--output /opt2/output_tn_fqs --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--pairs /opt2/.tests/pairs.tsv --genome hg38 --mode local --ffpe --cnv --runmode dryrun
- name: Tumor-only FastQ Dry Run
run: |
docker run -v $PWD:/opt2 snakemake/snakemake:v6.8.2 \
/opt2/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.R1.fastq.gz /opt2/.tests/Sample10_ARK1_S37.R2.fastq.gz \
/opt2/.tests/Sample11_ACI_158_S38.R1.fastq.gz /opt2/.tests/Sample11_ACI_158_S38.R2.fastq.gz \
/opt2/.tests/Sample4_CRL1622_S31.R1.fastq.gz /opt2/.tests/Sample4_CRL1622_S31.R2.fastq.gz \
--output /opt2/output_tonly_fqs --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--genome hg38 --mode local --ffpe --runmode init
docker run -v $PWD:/opt2 snakemake/snakemake:v6.8.2 \
/opt2/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.R1.fastq.gz /opt2/.tests/Sample10_ARK1_S37.R2.fastq.gz \
/opt2/.tests/Sample11_ACI_158_S38.R1.fastq.gz /opt2/.tests/Sample11_ACI_158_S38.R2.fastq.gz \
/opt2/.tests/Sample4_CRL1622_S31.R1.fastq.gz /opt2/.tests/Sample4_CRL1622_S31.R2.fastq.gz \
--output /opt2/output_tonly_fqs --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--genome hg38 --mode local --ffpe --runmode dryrun
- name: Tumor-normal BAM Dry Run
run: |
docker run -v $PWD:/opt2 snakemake/snakemake:v6.8.2 \
/opt2/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.recal.bam \
/opt2/.tests/Sample11_ACI_158_S38.recal.bam \
/opt2/.tests/Sample4_CRL1622_S31.recal.bam \
--output /opt2/output_tn_bams --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--pairs /opt2/.tests/pairs.tsv --genome hg38 --mode local --ffpe --cnv --runmode init
docker run -v $PWD:/opt2 snakemake/snakemake:v6.8.2 \
/opt2/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.recal.bam \
/opt2/.tests/Sample11_ACI_158_S38.recal.bam \
/opt2/.tests/Sample4_CRL1622_S31.recal.bam \
--output /opt2/output_tn_bams --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--pairs /opt2/.tests/pairs.tsv --genome hg38 --mode local --ffpe --cnv --runmode dryrun
- name: Tumor-only BAM Dry Run
run: |
docker run -v $PWD:/opt2 snakemake/snakemake:v6.8.2 \
/opt2/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.recal.bam \
/opt2/.tests/Sample11_ACI_158_S38.recal.bam \
/opt2/.tests/Sample4_CRL1622_S31.recal.bam \
--output /opt2/output_tonly_bams --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--genome hg38 --mode local --ffpe --runmode init
docker run -v $PWD:/opt2 snakemake/snakemake:v6.8.2 \
/opt2/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.recal.bam \
/opt2/.tests/Sample11_ACI_158_S38.recal.bam \
/opt2/.tests/Sample4_CRL1622_S31.recal.bam \
--output /opt2/output_tonly_bams --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--genome hg38 --mode local --ffpe --runmode dryrun
- name: Lint Workflow
continue-on-error: true
run: |
docker run -v $PWD:/opt2 snakemake/snakemake:v5.24.2 snakemake --lint -s /opt2/output/workflow/Snakefile -d /opt2/output_tn_fqs || \
echo 'There may have been a few warnings or errors. Please read through the log to determine if its harmless.'
- uses: actions/checkout@v2
- uses: docker://snakemake/snakemake:v6.8.2
- name: check CLI basics
run: |
./xavier --help
./xavier --version
- name: Tumor-normal FastQ Dry Run
run: |
docker run -v $PWD:/opt2 snakemake/snakemake:v6.8.2 \
/opt2/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.R1.fastq.gz /opt2/.tests/Sample10_ARK1_S37.R2.fastq.gz \
/opt2/.tests/Sample11_ACI_158_S38.R1.fastq.gz /opt2/.tests/Sample11_ACI_158_S38.R2.fastq.gz \
/opt2/.tests/Sample4_CRL1622_S31.R1.fastq.gz /opt2/.tests/Sample4_CRL1622_S31.R2.fastq.gz \
--output /opt2/output_tn_fqs --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--pairs /opt2/.tests/pairs.tsv --genome hg38 --mode local --ffpe --cnv --runmode init
docker run -v $PWD:/opt2 snakemake/snakemake:v6.8.2 \
/opt2/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.R1.fastq.gz /opt2/.tests/Sample10_ARK1_S37.R2.fastq.gz \
/opt2/.tests/Sample11_ACI_158_S38.R1.fastq.gz /opt2/.tests/Sample11_ACI_158_S38.R2.fastq.gz \
/opt2/.tests/Sample4_CRL1622_S31.R1.fastq.gz /opt2/.tests/Sample4_CRL1622_S31.R2.fastq.gz \
--output /opt2/output_tn_fqs --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--pairs /opt2/.tests/pairs.tsv --genome hg38 --mode local --ffpe --cnv --runmode dryrun
- name: Tumor-only FastQ Dry Run
run: |
docker run -v $PWD:/opt2 snakemake/snakemake:v6.8.2 \
/opt2/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.R1.fastq.gz /opt2/.tests/Sample10_ARK1_S37.R2.fastq.gz \
/opt2/.tests/Sample11_ACI_158_S38.R1.fastq.gz /opt2/.tests/Sample11_ACI_158_S38.R2.fastq.gz \
/opt2/.tests/Sample4_CRL1622_S31.R1.fastq.gz /opt2/.tests/Sample4_CRL1622_S31.R2.fastq.gz \
--output /opt2/output_tonly_fqs --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--genome hg38 --mode local --ffpe --runmode init
docker run -v $PWD:/opt2 snakemake/snakemake:v6.8.2 \
/opt2/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.R1.fastq.gz /opt2/.tests/Sample10_ARK1_S37.R2.fastq.gz \
/opt2/.tests/Sample11_ACI_158_S38.R1.fastq.gz /opt2/.tests/Sample11_ACI_158_S38.R2.fastq.gz \
/opt2/.tests/Sample4_CRL1622_S31.R1.fastq.gz /opt2/.tests/Sample4_CRL1622_S31.R2.fastq.gz \
--output /opt2/output_tonly_fqs --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--genome hg38 --mode local --ffpe --runmode dryrun
- name: Tumor-normal BAM Dry Run
run: |
docker run -v $PWD:/opt2 snakemake/snakemake:v6.8.2 \
/opt2/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.recal.bam \
/opt2/.tests/Sample11_ACI_158_S38.recal.bam \
/opt2/.tests/Sample4_CRL1622_S31.recal.bam \
--output /opt2/output_tn_bams --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--pairs /opt2/.tests/pairs.tsv --genome hg38 --mode local --ffpe --cnv --runmode init
docker run -v $PWD:/opt2 snakemake/snakemake:v6.8.2 \
/opt2/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.recal.bam \
/opt2/.tests/Sample11_ACI_158_S38.recal.bam \
/opt2/.tests/Sample4_CRL1622_S31.recal.bam \
--output /opt2/output_tn_bams --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--pairs /opt2/.tests/pairs.tsv --genome hg38 --mode local --ffpe --cnv --runmode dryrun
- name: Tumor-only BAM Dry Run
run: |
docker run -v $PWD:/opt2 snakemake/snakemake:v6.8.2 \
/opt2/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.recal.bam \
/opt2/.tests/Sample11_ACI_158_S38.recal.bam \
/opt2/.tests/Sample4_CRL1622_S31.recal.bam \
--output /opt2/output_tonly_bams --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--genome hg38 --mode local --ffpe --runmode init
docker run -v $PWD:/opt2 snakemake/snakemake:v6.8.2 \
/opt2/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.recal.bam \
/opt2/.tests/Sample11_ACI_158_S38.recal.bam \
/opt2/.tests/Sample4_CRL1622_S31.recal.bam \
--output /opt2/output_tonly_bams --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--genome hg38 --mode local --ffpe --runmode dryrun
- name: Lint Workflow
continue-on-error: true
run: |
docker run -v $PWD:/opt2 snakemake/snakemake:v5.24.2 snakemake --lint -s /opt2/output/workflow/Snakefile -d /opt2/output_tn_fqs || \
echo 'There may have been a few warnings or errors. Please read through the log to determine if its harmless.'
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -166,4 +166,4 @@ test_*/
test.sh

# bash history files
**/.koparde*
**/.koparde*
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ repos:
hooks:
- id: codespell
# https://github.com/codespell-project/codespell/issues/1498
exclude: >
(?x)^(
.*\.svg
)$
# https://github.com/codespell-project/codespell/issues/1498
# Python formatting
- repo: https://github.com/psf/black
rev: 23.7.0
Expand Down
3 changes: 1 addition & 2 deletions .tests/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# About

These input files are used for continuous integration purposes, specificially to dry run the pipeline whenever commits have been made to the main, master, or unified branches.
These input files are used for continuous integration purposes, specifically to dry run the pipeline whenever commits have been made to the main, master, or unified branches.

**Please Note:** Each of the provided FastQ files and BAM files are empty and are not suitable input to the CCBR GATK4 pipeline!

1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

- Create `CITATION.cff` to describe how to cite XAVIER. (#68, @kelly-sovacool)
- Provide a more helpful error message when `xavier` is called with no arguments. (#75, @kelly-sovacool)
- Minor documentation improvements. (#78, @kelly-sovacool)

## v3.0.2

Expand Down
35 changes: 20 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
# XAVIER - e**X**ome **A**nalysis and **V**ariant explor**ER** 🔬 [![tests](https://github.com/CCBR/XAVIER/workflows/tests/badge.svg)](https://github.com/CCBR/XAVIER/actions/workflows/main.yaml) [![docs](https://github.com/CCBR/XAVIER/workflows/docs/badge.svg)](https://github.com/CCBR/XAVIER/actions/workflows/docs.yml) [![Docker Pulls](https://img.shields.io/docker/pulls/nciccbr/ccbr_wes_base)](https://hub.docker.com/r/nciccbr/ccbr_wes_base) [![GitHub issues](https://img.shields.io/github/issues/CCBR/XAVIER?color=brightgreen)](https://github.com/CCBR/XAVIER/issues) [![GitHub license](https://img.shields.io/github/license/CCBR/XAVIER)](https://github.com/CCBR/XAVIER/blob/main/LICENSE)
# XAVIER - e**X**ome **A**nalysis and **V**ariant explor**ER** 🔬 [![tests](https://github.com/CCBR/XAVIER/workflows/tests/badge.svg)](https://github.com/CCBR/XAVIER/actions/workflows/main.yaml) [![docs](https://github.com/CCBR/XAVIER/workflows/docs/badge.svg)](https://github.com/CCBR/XAVIER/actions/workflows/docs.yml) [![Docker Pulls](https://img.shields.io/docker/pulls/nciccbr/ccbr_wes_base)](https://hub.docker.com/r/nciccbr/ccbr_wes_base) [![GitHub issues](https://img.shields.io/github/issues/CCBR/XAVIER?color=brightgreen)](https://github.com/CCBR/XAVIER/issues) [![GitHub license](https://img.shields.io/github/license/CCBR/XAVIER)](https://github.com/CCBR/XAVIER/blob/main/LICENSE)

> ***_XAVIER - eXome Analysis and Variant explorER_***. This is the home of the pipeline, XAVIER. Its long-term goals: to accurately call germline and somatic variants, to infer CNVs, and to boldly annotate variants like no pipeline before!
> **_*XAVIER - eXome Analysis and Variant explorER*_**. This is the home of the pipeline, XAVIER. Its long-term goals: to accurately call germline and somatic variants, to infer CNVs, and to boldly annotate variants like no pipeline before!
## Overview

Welcome to XAVIER! Before getting started, we highly recommend reading through [xavier's documentation](https://CCBR.github.io/XAVIER).

The **`xavier`** pipeline is composed several inter-related sub commands to setup and run the pipeline across different systems. Each of the available sub commands perform different functions:
The **`xavier`** pipeline is composed several inter-related sub commands to setup and run the pipeline across different systems. Each of the available sub commands perform different functions:

* [<code>xavier <b>run</b></code>](https://CCBR.github.io/XAVIER/usage/run/): Run the XAVIER pipeline with your input files.
* [<code>xavier <b>unlock</b></code>](https://CCBR.github.io/XAVIER/usage/unlock/): Unlocks a previous runs output directory.
* [<code>xavier <b>cache</b></code>](https://CCBR.github.io/XAVIER/usage/cache/): Cache remote resources locally, coming soon!
- [<code>xavier <b>run</b></code>](https://CCBR.github.io/XAVIER/usage/run/): Run the XAVIER pipeline with your input files.
- [<code>xavier <b>unlock</b></code>](https://CCBR.github.io/XAVIER/usage/unlock/): Unlocks a previous runs output directory.
- [<code>xavier <b>cache</b></code>](https://CCBR.github.io/XAVIER/usage/cache/): Cache remote resources locally, coming soon!

XAVIER is a comprehensive whole exome-sequencing pipeline following the Broad's set of best practices. It relies on technologies like [Singularity<sup>1</sup>](https://singularity.lbl.gov/) to maintain the highest-level of reproducibility. The pipeline consists of a series of data processing and quality-control steps orchestrated by [Snakemake<sup>2</sup>](https://snakemake.readthedocs.io/en/stable/), a flexible and scalable workflow management system, to submit jobs to a cluster or cloud provider.

Expand All @@ -20,12 +21,15 @@ Before getting started, we highly recommend reading through the [usage](https://
For more information about issues or trouble-shooting a problem, please checkout our [FAQ](faq/questions.md) prior to [opening an issue on Github](https://github.com/CCBR/XAVIER/issues).

## Dependencies
**Requires:** `singularity>=3.5` `snakemake==6.X`

**Requires:** `singularity>=3.5` `snakemake==6.X`

[Snakemake](https://snakemake.readthedocs.io/en/stable/getting_started/installation.html) and [singularity](https://singularity.lbl.gov/all-releases) must be installed on the target system. Snakemake orchestrates the execution of each step in the pipeline. To guarantee the highest level of reproducibility, each step relies on versioned images from [DockerHub](https://hub.docker.com/orgs/nciccbr/repositories). Snakemake uses singaularity to pull these images onto the local filesystem prior to job execution, and as so, snakemake and singularity are the only two dependencies.

## Run XAVIER pipeline

### Biowulf

```bash
# XAVIER is configured to use different execution backends: local or slurm
# view the help page for more information
Expand All @@ -52,7 +56,7 @@ xavier run \
--pairs pairs.txt \
--targets Targets_hg38.bed \
--mode slurm \
--runmode init
--runmode init

# Second, do a dry run to visualize outputs
xavier run \
Expand All @@ -62,7 +66,7 @@ xavier run \
--pairs pairs.txt \
--targets Targets_hg38.bed \
--mode slurm \
--runmode dryrun
--runmode dryrun

# Then do a complete run
xavier run \
Expand All @@ -72,10 +76,11 @@ xavier run \
--pairs pairs.txt \
--targets Targets_hg38.bed \
--mode slurm \
--runmode run
--runmode run
```

### FRCE

```bash
# grab an interactive node
srun --export all --pty --x11 bash
Expand Down Expand Up @@ -103,11 +108,11 @@ xavier run \

```

## Contribute

This site is a living document, created for and by members like you. XAVIER is maintained by the members of CCBR and is improved by continous feedback! We encourage you to contribute new content and make improvements to existing content via pull request to our [repository](https://github.com/CCBR/XAVIER/pulls).
## Contribute

This site is a living document, created for and by members like you. XAVIER is maintained by the members of CCBR and is improved by continuous feedback! We encourage you to contribute new content and make improvements to existing content via pull request to our [repository](https://github.com/CCBR/XAVIER/pulls).

## References
<sup>**1.** Kurtzer GM, Sochat V, Bauer MW (2017). Singularity: Scientific containers for mobility of compute. PLoS ONE 12(5): e0177459.</sup>
<sup>**2.** Koster, J. and S. Rahmann (2018). "Snakemake-a scalable bioinformatics workflow engine." Bioinformatics 34(20): 3600.</sup>

<sup>**1.** Kurtzer GM, Sochat V, Bauer MW (2017). Singularity: Scientific containers for mobility of compute. PLoS ONE 12(5): e0177459.</sup>
<sup>**2.** Koster, J. and S. Rahmann (2018). "Snakemake-a scalable bioinformatics workflow engine." Bioinformatics 34(20): 3600.</sup>
34 changes: 24 additions & 10 deletions bin/redirect
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@
# - loads require modules
# - parses the variables directly to the python script


function load_module_if_needed {
module_name=$1
x=$(type -P $module_name)
if [[ -z $x ]];then
module load $module_name
fi
}

SCRIPTNAME="$BASH_SOURCE"
SCRIPTDIRNAME=$(readlink -f $(dirname "$SCRIPTNAME"))

Expand All @@ -15,15 +24,15 @@ fi

TOOLDIR=$(dirname "$SCRIPTDIRNAME")
TOOLNAME=$(basename "$SCRIPTNAME")
echo $TOOLNAME
# echo $TOOLNAME

# find out if you are running on biowulf or frce and load conda
# find out if you are running on biowulf or frce
nbiowulf=$(scontrol show config | grep -i -c biowulf)
if [[ "$nbiowulf" > 0 ]];then ISBIOWULF=true; else ISBIOWULF=false;fi
nfrce=$(scontrol show config | grep -i -c fsitgl)
if [[ "$nfrce" > 0 ]];then ISFRCE=true; else ISFRCE=false;fi


# load conda
if [[ $ISBIOWULF == true ]];then
. "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh"
conda activate py311
Expand All @@ -37,20 +46,25 @@ else
echo " - singularity is in PATH"
echo " - snakemake is in PATH"
fi
# if not on biowulf or frce then
# use py311.environment.yml in resources folder to create the py311 conda environment
# and load py311

# load required modules
# if running somewhere other than biowulf or frce, then ensure that
# - singularity
# - snakemake
# are in PATH
if [[ $ISBIOWULF == true ]];then
module purge
module load singularity
module load snakemake
# module purge
load_module_if_needed singularity
load_module_if_needed snakemake
elif [[ $ISFRCE == true ]];then
# snakemake module on FRCE does not work as expected
# use the conda installed version of snakemake instead
module purge
module load singularity
# module purge
load_module_if_needed load singularity
export PATH="/mnt/projects/CCBR-Pipelines/bin:$PATH"
fi


${TOOLDIR}/${TOOLNAME} "$@" || true

Loading

0 comments on commit d0f8621

Please sign in to comment.