Skip to content

Commit

Permalink
Merge pull request #102 from CCBR/iss-100
Browse files Browse the repository at this point in the history
Set default exome targets file based on genome
  • Loading branch information
samarth8392 authored Aug 12, 2024
2 parents cbf9842 + 86b7bfc commit 0c1a3ce
Show file tree
Hide file tree
Showing 15 changed files with 106 additions and 21 deletions.
16 changes: 8 additions & 8 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@ jobs:
/opt2/.tests/Sample10_ARK1_S37.R1.fastq.gz /opt2/.tests/Sample10_ARK1_S37.R2.fastq.gz \
/opt2/.tests/Sample11_ACI_158_S38.R1.fastq.gz /opt2/.tests/Sample11_ACI_158_S38.R2.fastq.gz \
/opt2/.tests/Sample4_CRL1622_S31.R1.fastq.gz /opt2/.tests/Sample4_CRL1622_S31.R2.fastq.gz \
--output /opt2/output_tn_fqs --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--output /opt2/output_tn_fqs --targets /opt2/resources/Agilent_SSv7_allExons_hg38.bed \
--pairs /opt2/.tests/pairs.tsv --genome hg38 --mode local --ffpe --cnv --runmode init
docker run -v $PWD:/opt2 snakemake/snakemake:v7.32.4 \
/opt2/bin/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.R1.fastq.gz /opt2/.tests/Sample10_ARK1_S37.R2.fastq.gz \
/opt2/.tests/Sample11_ACI_158_S38.R1.fastq.gz /opt2/.tests/Sample11_ACI_158_S38.R2.fastq.gz \
/opt2/.tests/Sample4_CRL1622_S31.R1.fastq.gz /opt2/.tests/Sample4_CRL1622_S31.R2.fastq.gz \
--output /opt2/output_tn_fqs --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--output /opt2/output_tn_fqs --targets /opt2/resources/Agilent_SSv7_allExons_hg38.bed \
--pairs /opt2/.tests/pairs.tsv --genome hg38 --mode local --ffpe --cnv --runmode dryrun
- name: Tumor-only FastQ Dry Run
Expand All @@ -44,15 +44,15 @@ jobs:
/opt2/.tests/Sample10_ARK1_S37.R1.fastq.gz /opt2/.tests/Sample10_ARK1_S37.R2.fastq.gz \
/opt2/.tests/Sample11_ACI_158_S38.R1.fastq.gz /opt2/.tests/Sample11_ACI_158_S38.R2.fastq.gz \
/opt2/.tests/Sample4_CRL1622_S31.R1.fastq.gz /opt2/.tests/Sample4_CRL1622_S31.R2.fastq.gz \
--output /opt2/output_tonly_fqs --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--output /opt2/output_tonly_fqs --targets /opt2/resources/Agilent_SSv7_allExons_hg38.bed \
--genome hg38 --mode local --ffpe --runmode init
docker run -v $PWD:/opt2 snakemake/snakemake:v7.32.4 \
/opt2/bin/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.R1.fastq.gz /opt2/.tests/Sample10_ARK1_S37.R2.fastq.gz \
/opt2/.tests/Sample11_ACI_158_S38.R1.fastq.gz /opt2/.tests/Sample11_ACI_158_S38.R2.fastq.gz \
/opt2/.tests/Sample4_CRL1622_S31.R1.fastq.gz /opt2/.tests/Sample4_CRL1622_S31.R2.fastq.gz \
--output /opt2/output_tonly_fqs --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--output /opt2/output_tonly_fqs --targets /opt2/resources/Agilent_SSv7_allExons_hg38.bed \
--genome hg38 --mode local --ffpe --runmode dryrun
- name: Tumor-normal BAM Dry Run
Expand All @@ -62,15 +62,15 @@ jobs:
/opt2/.tests/Sample10_ARK1_S37.recal.bam \
/opt2/.tests/Sample11_ACI_158_S38.recal.bam \
/opt2/.tests/Sample4_CRL1622_S31.recal.bam \
--output /opt2/output_tn_bams --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--output /opt2/output_tn_bams --targets /opt2/resources/Agilent_SSv7_allExons_hg38.bed \
--pairs /opt2/.tests/pairs.tsv --genome hg38 --mode local --ffpe --cnv --runmode init
docker run -v $PWD:/opt2 snakemake/snakemake:v7.32.4 \
/opt2/bin/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.recal.bam \
/opt2/.tests/Sample11_ACI_158_S38.recal.bam \
/opt2/.tests/Sample4_CRL1622_S31.recal.bam \
--output /opt2/output_tn_bams --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--output /opt2/output_tn_bams --targets /opt2/resources/Agilent_SSv7_allExons_hg38.bed \
--pairs /opt2/.tests/pairs.tsv --genome hg38 --mode local --ffpe --cnv --runmode dryrun
- name: Tumor-only BAM Dry Run
Expand All @@ -80,15 +80,15 @@ jobs:
/opt2/.tests/Sample10_ARK1_S37.recal.bam \
/opt2/.tests/Sample11_ACI_158_S38.recal.bam \
/opt2/.tests/Sample4_CRL1622_S31.recal.bam \
--output /opt2/output_tonly_bams --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--output /opt2/output_tonly_bams --targets /opt2/resources/Agilent_SSv7_allExons_hg38.bed \
--genome hg38 --mode local --ffpe --runmode init
docker run -v $PWD:/opt2 snakemake/snakemake:v7.32.4 \
/opt2/bin/xavier run --input \
/opt2/.tests/Sample10_ARK1_S37.recal.bam \
/opt2/.tests/Sample11_ACI_158_S38.recal.bam \
/opt2/.tests/Sample4_CRL1622_S31.recal.bam \
--output /opt2/output_tonly_bams --targets /opt2/.tests/Agilent_SSv7_allExons_hg38.bed \
--output /opt2/output_tonly_bams --targets /opt2/resources/Agilent_SSv7_allExons_hg38.bed \
--genome hg38 --mode local --ffpe --runmode dryrun
- name: Lint
Expand Down
Empty file.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
- The docs website now has a dropdown menu to select which version to view. The latest release is shown by default. (#150, @kelly-sovacool)
- Add `xavier gui` subcommand to launch the graphical user interface. (#99, @kelly-sovacool)
- Previously, `xavier_gui` (with an underscore) was a command in the `ccbrpipeliner` module.
- Provide default exome targets for hg38 and mm10, which can be overridden by the optional `--targets` argument. (#102, @kelly-sovacool)
- Previously, the `--targets` argument was required with no defaults.

## XAVIER 3.0.3

Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ xavier run \
--output /data/$USER/xavier_hg38 \
--genome hg38 \
--pairs pairs.txt \
--targets Targets_hg38.bed \
--targets resources/Agilent_SSv7_allExons_hg38.bed \
--mode slurm \
--runmode init

Expand All @@ -71,7 +71,7 @@ xavier run \
--output /data/$USER/xavier_hg38 \
--genome hg38 \
--pairs pairs.txt \
--targets Targets_hg38.bed \
--targets resources/Agilent_SSv7_allExons_hg38.bed \
--mode slurm \
--runmode dryrun

Expand All @@ -81,7 +81,7 @@ xavier run \
--output /data/$USER/xavier_hg38 \
--genome hg38 \
--pairs pairs.txt \
--targets Targets_hg38.bed \
--targets resources/Agilent_SSv7_allExons_hg38.bed \
--mode slurm \
--runmode run
```
Expand Down Expand Up @@ -109,7 +109,7 @@ xavier run \
--sif-cache $SIFCACHE \
--tmp-dir $TMPDIR \
--pairs pairs.txt \
--targets Targets_hg38.bed \
--targets resources/Agilent_SSv7_allExons_hg38.bed \
--mode slurm \
--runmode init # run

Expand Down
1 change: 1 addition & 0 deletions config/genomes/biowulf/hg38.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"references": {
"FASTQ_SCREEN_CONFIG": "resources/fastq_screen.biowulf.conf",
"exome_targets": "resources/Agilent_SSv7_allExons_hg38.bed",
"KRAKENBACDB": "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/kraken/20180907_standard_kraken2",
"trimmomatic.adapters": "resources/adapters.fa",
"SNPEFF_GENOME": "GRCh38.86",
Expand Down
1 change: 1 addition & 0 deletions config/genomes/biowulf/mm10.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"references": {
"FASTQ_SCREEN_CONFIG": "resources/fastq_screen.biowulf.conf",
"exome_targets": "resources/SureSelect_mm10_sorted.bed",
"KRAKENBACDB": "/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/kraken/20180907_standard_kraken2",
"trimmomatic.adapters": "resources/adapters.fa",
"SNPEFF_GENOME": "GRCm38.86",
Expand Down
1 change: 1 addition & 0 deletions config/genomes/frce/hg38.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"references": {
"FASTQ_SCREEN_CONFIG": "resources/fastq_screen.frce.conf",
"exome_targets": "resources/Agilent_SSv7_allExons_hg38.bed",
"KRAKENBACDB": "/mnt/projects/CCBR-Pipelines/pipelines/XAVIER/resources/hg38/kraken/20180907_standard_kraken2",
"trimmomatic.adapters": "resources/adapters.fa",
"SNPEFF_GENOME": "GRCh38.86",
Expand Down
1 change: 1 addition & 0 deletions config/genomes/frce/mm10.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"references": {
"FASTQ_SCREEN_CONFIG": "resources/fastq_screen.frce.conf",
"exome_targets": "resources/SureSelect_mm10_sorted.bed",
"KRAKENBACDB": "/mnt/projects/CCBR-Pipelines/pipelines/XAVIER/resources/hg38/kraken/20180907_standard_kraken2",
"trimmomatic.adapters": "resources/adapters.fa",
"SNPEFF_GENOME": "GRCm38.86",
Expand Down
6 changes: 4 additions & 2 deletions docs/usage/run.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ Each of the following arguments are required. Failure to provide a required argu
> This option defines the reference genome for your set of samples. On Biowulf, xavier does comes bundled with pre built reference files for human samples; however, it is worth noting that the pipeline does accept a pre-built resource bundle pulled with the cache sub command (coming soon). Currently, the pipeline only supports the human reference hg38; however, support for mouse reference mm10 will be added soon.
>
> **_Pre built Option_**
> Here is a list of available pre built genomes on Biowulf: hg38.
> Here is a list of available pre built genomes on Biowulf: hg38, mm10.
>
> **_Custom Option_**
> For users running the pipeline outside of Biowulf, a pre-built resource bundle can be pulled with the cache sub command (coming soon). Please supply the custom reference JSON file that was generated by the cache sub command.
Expand All @@ -98,7 +98,9 @@ Each of the following arguments are required. Failure to provide a required argu
>
> This file can be obtained from the manufacturer of the target capture kit that was used.
>
> **_Example:_** `--targets /data/$USER/Agilent_SSv7_allExons_hg38.bed`
> If not provided, the default targets file from the genome config is used
>
> **_Example:_** `--targets resources/Agilent_SSv7_allExons_hg38.bed` > **_Example:_** `--targets resources/SureSelect_mm10_sorted.bed`
### 2.2 Options

Expand Down
Empty file modified resources/Agilent_SSv7_allExons_hg38.bed
100755 → 100644
Empty file.
Empty file modified resources/fastq_screen.frce.conf
100755 → 100644
Empty file.
12 changes: 8 additions & 4 deletions src/xavier/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,9 @@ def parsed_arguments():
Path to exome targets BED file. This file can be
obtained from the manufacturer of the target capture
kit that was used.
If not provided, the default targets file is used from the genome config file.
Example: --targets resources/Agilent_SSv7_allExons_hg38.bed
Example: --targets resources/SureSelect_mm10_sorted.bed
"""
)
Expand All @@ -264,15 +267,15 @@ def parsed_arguments():
--input .tests/*.R?.fastq.gz \\
--output /data/$USER/xavier_hg38 \\
--genome hg38 \\
--targets .tests/Agilent_SSv7_allExons_hg38.bed
--targets resources/Agilent_SSv7_allExons_hg38.bed
# Step 2B.) Dry-run the pipeline
xavier run \\
--runmode dryrun \\
--input .tests/*.R?.fastq.gz \\
--output /data/$USER/xavier_hg38 \\
--genome hg38 \\
--targets Agilent_SSv7_allExons_hg38.bed \\
--targets resources/Agilent_SSv7_allExons_hg38.bed \\
--mode slurm \\
# Step 2C.) Run the XAVIER pipeline
Expand All @@ -283,7 +286,7 @@ def parsed_arguments():
--input .tests/*.R?.fastq.gz \\
--output /data/$USER/xavier_hg38 \\
--genome hg38 \\
--targets .tests/Agilent_SSv7_allExons_hg38.bed \\
--targets resources/Agilent_SSv7_allExons_hg38.bed \\
--mode slurm
version:
Expand Down Expand Up @@ -354,8 +357,9 @@ def parsed_arguments():
"--targets",
# Check if the file exists and if it is readable
type=lambda file: permissions(parser, file, os.R_OK),
required=True,
required=False,
help=argparse.SUPPRESS,
default=None,
)

# Optional Arguments
Expand Down
14 changes: 12 additions & 2 deletions src/xavier/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def setup(sub_args, repo_path, output_path, create_nidap_folder_YN="no", links=[
f"{shorthostname} unknown host. Configuration files for references may not be correct. Defaulting to Biowulf config"
)
else:
print(f"Thank you for running XAVIER on {shorthostname.upper()}")
print(f"Thank you for running XAVIER on {shorthostname.upper()}")

genome_config = os.path.join(
repo_path, "config", "genomes", get_hpcname(), sub_args.genome + ".json"
Expand Down Expand Up @@ -370,7 +370,17 @@ def setup(sub_args, repo_path, output_path, create_nidap_folder_YN="no", links=[
# Add optional cli workflow steps
config["input_params"]["CNV_CALLING"] = str(sub_args.cnv).lower()
config["input_params"]["FFPE_FILTER"] = str(sub_args.ffpe).lower()
config["input_params"]["EXOME_TARGETS"] = str(sub_args.targets)
config["input_params"]["EXOME_TARGETS"] = (
str(sub_args.targets)
if sub_args.targets
else os.path.join(
config["project"]["workpath"], config["references"]["exome_targets"]
)
)
if not os.path.exists(config["input_params"]["EXOME_TARGETS"]):
raise FileNotFoundError(
f"Exome targets file does not exist: {config['input_params']['EXOME_TARGETS']}"
)
config["input_params"]["VARIANT_CALLERS"] = sub_args.callers
config["input_params"]["PAIRS_FILE"] = str(sub_args.pairs)
config["input_params"]["BASE_OUTDIR"] = str(sub_args.output)
Expand Down
63 changes: 63 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,34 @@
import json
import os
import subprocess
import tempfile
from xavier.src.xavier.__main__ import main
from xavier.src.xavier.util import get_hpcname

xavier_run = (
"xavier run "
"--input .tests/*.fastq.gz "
"--pairs .tests/pairs.tsv "
"--mode local "
)


def run_in_temp(command_str):
with tempfile.TemporaryDirectory() as tmp_dir:
outdir = os.path.join(tmp_dir, "testout")
run_command = f"{command_str} --output {outdir}"
output = subprocess.run(
f"{run_command} --runmode init && {run_command} --runmode dryrun",
capture_output=True,
shell=True,
text=True,
)
if os.path.exists(os.path.join(outdir, "config.json")):
with open(os.path.join(outdir, "config.json"), "r") as infile:
config = json.load(infile)
else:
config = None
return output, config


def test_help():
Expand All @@ -9,3 +38,37 @@ def test_help():
"./bin/xavier --help", capture_output=True, shell=True, text=True
).stdout
)


def test_dryrun_targets():
if get_hpcname() == "biowulf":
output_human, config_human = run_in_temp(f"{xavier_run} --genome hg38")
output_mouse, config_mouse = run_in_temp(f"{xavier_run} --genome mm10")
output_custom, config_custom = run_in_temp(
f"{xavier_run} --genome mm10 --targets resources/Agilent_SSv7_allExons_hg38.bed"
)
output_invalid, config_invalid = run_in_temp(
f"{xavier_run} --genome hg38 --target not/a/file.txt"
)
assert all(
[
"This was a dry-run (flag -n). The order of jobs does not reflect the order of execution."
in output_human.stdout,
"This was a dry-run (flag -n). The order of jobs does not reflect the order of execution."
in output_mouse.stdout,
"This was a dry-run (flag -n). The order of jobs does not reflect the order of execution."
in output_custom.stdout,
"error: Path 'not/a/file.txt' does not exists! Failed to provide valid input."
in output_invalid.stderr,
config_human["input_params"]["EXOME_TARGETS"].endswith(
"resources/Agilent_SSv7_allExons_hg38.bed"
),
config_mouse["input_params"]["EXOME_TARGETS"].endswith(
"resources/SureSelect_mm10_sorted.bed"
),
config_custom["input_params"]["EXOME_TARGETS"].endswith(
"resources/Agilent_SSv7_allExons_hg38.bed"
),
not config_invalid,
]
)
2 changes: 1 addition & 1 deletion tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_dryrun():
input=list(glob.glob(xavier_base(".tests/*.fastq.gz"))),
output=tmp_dir,
genome="hg38",
targets=xavier_base(".tests/Agilent_SSv7_allExons_hg38.bed"),
targets=xavier_base("resources/Agilent_SSv7_allExons_hg38.bed"),
mode="local",
job_name="pl:xavier",
callers=["mutect2", "mutect", "strelka", "vardict", "varscan"],
Expand Down

0 comments on commit 0c1a3ce

Please sign in to comment.