Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
stevekm committed Aug 2, 2022
2 parents 7498504 + 7ee15aa commit f9f78c5
Show file tree
Hide file tree
Showing 24 changed files with 4,827 additions and 3,398 deletions.
25 changes: 25 additions & 0 deletions cwl/convert_TNMafPileupPair_to_MSIInputPair.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.2
class: ExpressionTool
requirements:
- $import: types.yml

inputs:
pairs: "types.yml#TNMafPileupPair[]"

outputs:
pairs: "types.yml#MSIInputPair[]"

expression: |
${
var pairs = [];
for ( var i in inputs.pairs ){
var pair = {
"tumor_id": inputs.pairs[i].tumor_id,
"normal_id": inputs.pairs[i].normal_id,
"pair_id": inputs.pairs[i].pair_id,
};
pairs.push(pair);
};
return {"pairs": pairs};
}
26 changes: 26 additions & 0 deletions cwl/convert_TNMafPileupPair_to_TMBInputPair.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.2
class: ExpressionTool
requirements:
- $import: types.yml

inputs:
pairs: "types.yml#TNMafPileupPair[]"

outputs:
pairs: "types.yml#TMBInputPair[]"

expression: |
${
var pairs = [];
for ( var i in inputs.pairs ){
var pair = {
"tumor_id": inputs.pairs[i].tumor_id,
"normal_id": inputs.pairs[i].normal_id,
"pair_id": inputs.pairs[i].pair_id,
"pair_maf": inputs.pairs[i].pair_maf
};
pairs.push(pair);
};
return {"pairs": pairs};
}
12 changes: 1 addition & 11 deletions cwl/facets-workflow.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -84,17 +84,7 @@ requirements:
- $import: types.yml

inputs:
pairs:
type:
type: array
items:
type: record
fields:
pair_maf: File
snp_pileup: File
pair_id: string
tumor_id: string
normal_id: string
pairs: "types.yml#TNMafPileupPair[]"

steps:
# run the facets suite wrapper set on each tumor normal pair
Expand Down
180 changes: 69 additions & 111 deletions cwl/msi.cwl
Original file line number Diff line number Diff line change
@@ -1,18 +1,7 @@
#!/usr/bin/env cwl-runner

# example command;
# msisensor msi \
# -d msi_sites
# -n normal_bam
# -t tumor_bam
# -tumor_sample_name tumor_sample_name
# -normal_sample_name normal_sample_name
# -o ${ return inputs.tumor_sample_name +"."+inputs.normal_sample_name+".msi.txt" }
#

cwlVersion: v1.0
class: CommandLineTool
baseCommand: [ "msisensor", "msi", "-b", "8" ]
baseCommand: [ "msisensor", "msi" ]

requirements:
DockerRequirement:
Expand All @@ -22,141 +11,110 @@ requirements:
ramMin: 16000
coresMin: 8

doc: |
Run msisensor on tumor-normal bams to differentiate MSI (microsatellite instable) samples from MSS (microsatellite stable) ones
doc: Run msisensor on tumor-normal bams to differentiate MSI (microsatellite instable) samples from MSS (microsatellite stable) ones

# NOTE: next time use more verbose input labels, and only make input args for options we are using in the pipeline
inputs:
d:
threads:
doc: threads number for parallel computing
type: string
default: "8"
# NOTE: no performance gains seen when using >8 threads
inputBinding:
prefix: -b

microsatellites_file:
type:
- string
- File
doc: homopolymer and microsatellites file
inputBinding:
prefix: -d

n:
normal_bam:
type:
- File
doc: normal bam file
secondaryFiles: ["^.bai"]
inputBinding:
prefix: -n

t:
tumor_bam:
type:
- File
doc: tumor bam file
secondaryFiles: ["^.bai"]
inputBinding:
prefix: -t

o:
output_filename:
type: string
doc: output distribution file
default: msi.txt
inputBinding:
prefix: -o

#below are optional inputs
# e:
# type: ['null', string]
# doc: bed file, to select a few regions
# inputBinding:
# prefix: -e
#
# f:
# type: ['null', double]
# doc: FDR threshold for somatic sites detection
# default: 0.05
# inputBinding:
# prefix: -f
#
# r:
# type: ['null', string]
# doc: choose one region, format 1:10000000-20000000
# inputBinding:
# prefix: -r
#
# l:
# type: ['null', int]
# default: 5
# doc: minimal homopolymer size
# inputBinding:
# prefix: -l
#
# p:
# type: ['null', int]
# default: 10
# doc: minimal homopolymer size for distribution analysis
# inputBinding:
# prefix: -p
#
# m:
# type: ['null', int]
# default: 50
# doc: maximal homopolymer size for distribution analysis
# inputBinding:
# prefix: -m
#
# q:
# type: ['null', int]
# default: 3
# doc: minimal microsatellites size
# inputBinding:
# prefix: -q
#
# s:
# type: ['null', int]
# default: 5
# doc: minimal number of repeats in microsatellites for distribution analysis
# inputBinding:
# prefix: -s
#
# w:
# type: ['null', int]
# default: 40
# doc: maximal microsatellites size for distribution analysis
# inputBinding:
# prefix: -w
#
# u:
# type: ['null', int]
# default: 500
# doc: span size around window for extracting reads
# inputBinding:
# prefix: -u
#
# b:
# type: ['null', int]
# default: 2
# doc: threads number for parallel computing
# inputBinding:
# prefix: -b
#
# x:
# type: ['null', int]
# default: 0
# doc: output homopolymer only, 0 is no, 1 is yes
# inputBinding:
# prefix: -x
#
# y:
# type: ['null', int]
# default: 0
# doc: output microsatellite only, 0 is no, 1 is yes
# inputBinding:
# prefix: -y

outputs:
output_file:
type: File
outputBinding:
glob: $(inputs.o)
glob: $(inputs.output_filename)
dis_file:
type: File
outputBinding:
glob: $(inputs.o)_dis
glob: $(inputs.output_filename)_dis
somatic_file:
type: File
outputBinding:
glob: $(inputs.o)_somatic
glob: $(inputs.output_filename)_somatic


# example command;
# msisensor msi \
# -d msi_sites
# -n normal_bam
# -t tumor_bam
# -tumor_sample_name tumor_sample_name
# -normal_sample_name normal_sample_name
# -o ${ return inputs.tumor_sample_name +"."+inputs.normal_sample_name+".msi.txt" }
#

#
# Program: msisensor (homopolymer and miscrosatelite analysis using bam files)
# Version: v0.2
# Author: Beifang Niu && Kai Ye
#
# Usage: msisensor <command> [options]
#
# Key commands:
#
# scan scan homopolymers and miscrosatelites
# msi msi scoring
#
#
#
# Singularity> msisensor msi
#
# Usage: msisensor msi [options]
#
# -d <string> homopolymer and microsates file
# -n <string> normal bam file
# -t <string> tumor bam file
# -o <string> output distribution file
#
# -e <string> bed file, optional
# -f <double> FDR threshold for somatic sites detection, default=0.05
# -c <int> coverage threshold for msi analysis, WXS: 20; WGS: 15, default=20
# -r <string> choose one region, format: 1:10000000-20000000
# -l <int> mininal homopolymer size, default=5
# -p <int> mininal homopolymer size for distribution analysis, default=10
# -m <int> maximal homopolymer size for distribution analysis, default=50
# -q <int> mininal microsates size, default=3
# -s <int> mininal microsates size for distribution analysis, default=5
# -w <int> maximal microstaes size for distribution analysis, default=40
# -u <int> span size around window for extracting reads, default=500
# -b <int> threads number for parallel computing, default=1
# -x <int> output homopolymer only, 0: no; 1: yes, default=0
# -y <int> output microsatellite only, 0: no; 1: yes, default=0
#
# -h help
Loading

0 comments on commit f9f78c5

Please sign in to comment.