Skip to content

Commit

Permalink
update the song/score module and example jsons
Browse files Browse the repository at this point in the history
  • Loading branch information
lindaxiang committed Apr 8, 2024
1 parent 948ce57 commit 4c5cb5d
Show file tree
Hide file tree
Showing 19 changed files with 721 additions and 33 deletions.
8 changes: 4 additions & 4 deletions example-params.local-qa.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@
"cleanup": true,
"lane_qc": false,
"aln_qc": false,
"song_url": "https://song.rdpc-qa.cancercollaboratory.org",
"score_url": "https://score.rdpc-qa.cancercollaboratory.org",
"song_url": "https://song.rdpc-qa.cumulus.genomeinformatics.org",
"score_url": "https://score.rdpc-qa.cumulus.genomeinformatics.org",
"download": {
"score_mem": 4,
"song_mem": 2,
"score_cpus": 2,
"score_url": "https://submission-score.rdpc-qa.cancercollaboratory.org",
"song_url": "https://submission-song.rdpc-qa.cancercollaboratory.org",
"score_url": "https://submission-score.rdpc-qa.cumulus.genomeinformatics.org",
"song_url": "https://submission-song.rdpc-qa.cumulus.genomeinformatics.org",
"song_cpus": 1
}
}
20 changes: 10 additions & 10 deletions example-params.rdpc-dev.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
"mem":16,
"cpus":4
},
"ribosomal_interval_list":"/nfs-dev-1-vol-dev-1/reference/rna-seq-references/GRCh38_Verily_v1.Picard_CollectRnaSeqMetrics/GRCh38_Verily_v1.rRNA.interval_list",
"ref_genome_gtf":"/nfs-dev-1-vol-dev-1/reference/rna-seq-references/GRCh38_Verily_v1.annotation/gencode.v40.chr_patch_hapl_scaff.annotation.gtf",
"ribosomal_interval_list":"/nfs-dev-vol-qa-3/reference/rna-seq-references/GRCh38_Verily_v1.Picard_CollectRnaSeqMetrics/GRCh38_Verily_v1.rRNA.interval_list",
"ref_genome_gtf":"/nfs-dev-vol-qa-3/reference/rna-seq-references/GRCh38_Verily_v1.annotation/gencode.v40.chr_patch_hapl_scaff.annotation.gtf",
"aln_qc":true,
"upload":{
"score_mem":10,
Expand All @@ -14,13 +14,14 @@
"song_cpus":2
},
"study_id":"TCRB-CA",
"song_url":"https://song.rdpc-dev.cancercollaboratory.org",
"song_url": "https://song.rdpc-dev.cumulus.genomeinformatics.org",
"score_url": "https://score.rdpc-dev.cumulus.genomeinformatics.org",
"download":{
"score_mem":10,
"song_mem":2,
"score_cpus":4,
"score_url":"https://submission-score.rdpc-dev.cancercollaboratory.org",
"song_url":"https://submission-song.rdpc-dev.cancercollaboratory.org",
"score_url": "https://submission-score.rdpc-dev.cumulus.genomeinformatics.org",
"song_url": "https://submission-song.rdpc-dev.cumulus.genomeinformatics.org",
"song_cpus":2
},
"mem":4,
Expand All @@ -33,16 +34,15 @@
"mem":18,
"cpus":4
},
"ref_genome_index_star":"/nfs-dev-1-vol-dev-1/reference/rna-seq-references/GRCh38_Verily_v1.STARindex.sjdbOverhang_75",
"ref_genome_index_star":"/nfs-dev-vol-qa-3/reference/rna-seq-references/GRCh38_Verily_v1.STARindex.sjdbOverhang_75",
"lane_qc":false,
"ref_genome_fa":"/nfs-dev-1-vol-dev-1/reference/rna-seq-references/GRCh38_Verily_v1.genome/GRCh38_Verily_v1.genome.fa",
"ref_genome_fa":"/nfs-dev-vol-qa-3/reference/rna-seq-references/GRCh38_Verily_v1.genome/GRCh38_Verily_v1.genome.fa",
"aligned-seq-QC":{
"mem":16,
"cpus":4
},
"analysis_metadata":"",
"cpus":1,
"score_url":"https://score.rdpc-dev.cancercollaboratory.org",
"payloadGen":{
"mem":8,
"cpus":2
Expand All @@ -53,9 +53,9 @@
"mem":80,
"cpus":12
},
"ref_flat":"/nfs-dev-1-vol-dev-1/reference/rna-seq-references/GRCh38_Verily_v1.Picard_CollectRnaSeqMetrics/GRCh38_Verily_v1.refFlat.txt.gz",
"ref_flat":"/nfs-dev-vol-qa-3/reference/rna-seq-references/GRCh38_Verily_v1.Picard_CollectRnaSeqMetrics/GRCh38_Verily_v1.refFlat.txt.gz",
"cleanup":true,
"ref_genome_index_hisat2":"/nfs-dev-1-vol-dev-1/reference/rna-seq-references/GRCh38_Verily_v1.HISAT2index/GRCh38_Verily_v1",
"ref_genome_index_hisat2":"/nfs-dev-vol-qa-3/reference/rna-seq-references/GRCh38_Verily_v1.HISAT2index/GRCh38_Verily_v1",
"sequencing_files":[

],
Expand Down
20 changes: 10 additions & 10 deletions example-params.rdpc-qa.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,28 @@
"study_id": "TCRB-CA",
"analysis_id": "c62cee87-04ae-4988-acee-8704aec988d4",
"analysis_metadata": "",
"ref_genome_index_star": "/nfs-dev-1-vol-qa-1/reference/rna-seq-references/GRCh38_Verily_v1.STARindex.sjdbOverhang_75",
"ref_genome_index_hisat2": "/nfs-dev-1-vol-qa-1/reference/rna-seq-references/GRCh38_Verily_v1.HISAT2index/GRCh38_Verily_v1",
"ref_genome_gtf": "/nfs-dev-1-vol-qa-1/reference/rna-seq-references/GRCh38_Verily_v1.annotation/gencode.v40.chr_patch_hapl_scaff.annotation.gtf",
"ref_genome_fa": "/nfs-dev-1-vol-qa-1/reference/rna-seq-references/GRCh38_Verily_v1.genome/GRCh38_Verily_v1.genome.fa",
"ref_genome_index_star": "/nfs-dev-vol-qa-3/reference/rna-seq-references/GRCh38_Verily_v1.STARindex.sjdbOverhang_75",
"ref_genome_index_hisat2": "/nfs-dev-vol-qa-3/reference/rna-seq-references/GRCh38_Verily_v1.HISAT2index/GRCh38_Verily_v1",
"ref_genome_gtf": "/nfs-dev-vol-qa-3/reference/rna-seq-references/GRCh38_Verily_v1.annotation/gencode.v40.chr_patch_hapl_scaff.annotation.gtf",
"ref_genome_fa": "/nfs-dev-vol-qa-3/reference/rna-seq-references/GRCh38_Verily_v1.genome/GRCh38_Verily_v1.genome.fa",
"sequencing_files": [],
"sjdboverhang": 75,
"ref_flat": "/nfs-dev-1-vol-qa-1/reference/rna-seq-references/GRCh38_Verily_v1.Picard_CollectRnaSeqMetrics/GRCh38_Verily_v1.refFlat.txt.gz",
"ribosomal_interval_list": "/nfs-dev-1-vol-qa-1/reference/rna-seq-references/GRCh38_Verily_v1.Picard_CollectRnaSeqMetrics/GRCh38_Verily_v1.rRNA.interval_list",
"ref_flat": "/nfs-dev-vol-qa-3/reference/rna-seq-references/GRCh38_Verily_v1.Picard_CollectRnaSeqMetrics/GRCh38_Verily_v1.refFlat.txt.gz",
"ribosomal_interval_list": "/nfs-dev-vol-qa-3/reference/rna-seq-references/GRCh38_Verily_v1.Picard_CollectRnaSeqMetrics/GRCh38_Verily_v1.rRNA.interval_list",
"cpus": 1,
"mem": 4,
"tempdir": "/icgc-argo-scratch",
"cleanup": true,
"lane_qc": false,
"aln_qc": false,
"song_url": "https://song.rdpc-qa.cancercollaboratory.org",
"score_url": "https://score.rdpc-qa.cancercollaboratory.org",
"song_url": "https://song.rdpc-qa.cumulus.genomeinformatics.org",
"score_url": "https://score.rdpc-qa.cumulus.genomeinformatics.org",
"download": {
"score_mem": 10,
"song_mem": 2,
"score_cpus": 4,
"score_url": "https://submission-score.rdpc-qa.cancercollaboratory.org",
"song_url": "https://submission-song.rdpc-qa.cancercollaboratory.org",
"score_url": "https://submission-score.rdpc-qa.cumulus.genomeinformatics.org",
"song_url": "https://submission-song.rdpc-qa.cumulus.genomeinformatics.org",
"song_cpus": 2
},
"seqDataToLaneBam": {
Expand Down
14 changes: 7 additions & 7 deletions rna-seq-alignment-wf/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ upload_params = [
*:(params.upload ?: [:])
]

include { SongScoreDownload as dnld } from './wfpr_modules/github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/song-score-download@2.8.0/main.nf' params(download_params)
include { SongScoreDownload as dnld } from './wfpr_modules/github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/song-score-download@2.9.0/main.nf' params(download_params)
include { seqDataToLaneBam as toLaneBam } from "./modules/raw.githubusercontent.com/icgc-argo-workflows/dna-seq-processing-tools/seq-data-to-lane-bam.0.3.3.0/tools/seq-data-to-lane-bam/seq-data-to-lane-bam.nf" params(seqDataToLaneBam_params)
include { icgcArgoRnaSeqAlignmentSTAR as star } from "./wfpr_modules/github.com/icgc-argo-workflows/rna-seq-alignment/[email protected]/alignSTAR.nf" params(starAligner_params)
include { icgcArgoRnaSeqAlignmentHISAT2 as hisat2 } from "./wfpr_modules/github.com/icgc-argo-workflows/rna-seq-alignment/[email protected]/alignHISAT2.nf" params(hisat2Aligner_params)
Expand All @@ -193,12 +193,12 @@ include { payloadGenRnaAlignment as pGenAlnStarSj; payloadGenRnaAlignment as pG
include { payloadGenRnaAlignment as pGenQcStar; payloadGenRnaAlignment as pGenQcHisat2 } from './wfpr_modules/github.com/icgc-argo-workflows/data-processing-utility-tools/[email protected]/main.nf' params(payloadGen_params)
include { payloadGenRnaAlignment as pGenSuppStar; payloadGenRnaAlignment as pGenSuppHisat2 } from './wfpr_modules/github.com/icgc-argo-workflows/data-processing-utility-tools/[email protected]/main.nf' params(payloadGen_params)
include { payloadGenRnaAlignment as pGenQcLane } from './wfpr_modules/github.com/icgc-argo-workflows/data-processing-utility-tools/[email protected]/main.nf' params(payloadGen_params)
include { SongScoreUpload as upAlnStar; SongScoreUpload as upAlnHisat2} from './wfpr_modules/github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].0/main.nf' params(upload_params)
include { SongScoreUpload as upAlnTxStar } from './wfpr_modules/github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].0/main.nf' params(upload_params)
include { SongScoreUpload as upAlnStarSj; SongScoreUpload as upAlnHisat2Sj} from './wfpr_modules/github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].0/main.nf' params(upload_params)
include { SongScoreUpload as upQcStar; SongScoreUpload as upQcHisat2} from './wfpr_modules/github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].0/main.nf' params(upload_params)
include { SongScoreUpload as upSuppStar; SongScoreUpload as upSuppHisat2} from './wfpr_modules/github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].0/main.nf' params(upload_params)
include { SongScoreUpload as upQcLane} from './wfpr_modules/github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].0/main.nf' params(upload_params)
include { SongScoreUpload as upAlnStar; SongScoreUpload as upAlnHisat2} from './wfpr_modules/github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].3/main.nf' params(upload_params)
include { SongScoreUpload as upAlnTxStar } from './wfpr_modules/github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].3/main.nf' params(upload_params)
include { SongScoreUpload as upAlnStarSj; SongScoreUpload as upAlnHisat2Sj} from './wfpr_modules/github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].3/main.nf' params(upload_params)
include { SongScoreUpload as upQcStar; SongScoreUpload as upQcHisat2} from './wfpr_modules/github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].3/main.nf' params(upload_params)
include { SongScoreUpload as upSuppStar; SongScoreUpload as upSuppHisat2} from './wfpr_modules/github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].3/main.nf' params(upload_params)
include { SongScoreUpload as upQcLane} from './wfpr_modules/github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].3/main.nf' params(upload_params)


// please update workflow code as needed
Expand Down
4 changes: 2 additions & 2 deletions rna-seq-alignment-wf/pkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
"url": "https://github.com/icgc-argo-workflows/rna-seq-alignment.git"
},
"dependencies": [
"github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/song-score-download@2.8.0",
"github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].0",
"github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/song-score-download@2.9.0",
"github.com/icgc-argo-workflows/nextflow-data-processing-utility-tools/[email protected].3",
"github.com/icgc-argo-workflows/rna-seq-alignment/[email protected]",
"github.com/icgc-argo-workflows/rna-seq-alignment/[email protected]",
"github.com/icgc-argo-workflows/argo-qc-tools/[email protected]",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl=2

// processes resources
params.cpus = 8
params.mem = 20

params.publish_dir = ""

params.max_retries = 5 // set to 0 will disable retry
params.first_retry_wait_time = 1 // in seconds

// required params w/ default
params.container = "ghcr.io/overture-stack/score"
params.container_version = "5.8.1"
params.transport_mem = 2 // Transport memory is in number of GBs

// optional if secret mounted from pod else required
params.api_token = "" // song/score API token for download process

// required params, no default
// --song_url song url for download process
// --score_url score url for download process

// TODO: Replace with score container once it can download files via analysis_id
process scoreDownload {
maxRetries params.max_retries
errorStrategy {
sleep(Math.pow(2, task.attempt) * params.first_retry_wait_time * 1000 as long); // backoff time increases exponentially before each retry
return params.max_retries ? 'retry' : 'finish'
}

pod = [secret: workflow.runName + "-secret", mountPath: "/tmp/rdpc_secret"]

cpus params.cpus
memory "${params.mem} GB"

container "${ params.score_container ?: params.container}:${params.score_container_version ?: params.container_version}"
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir ? true : false

label "scoreDownload"
tag "${analysis_id}"

if (workflow.containerEngine == "singularity") {
containerOptions "--bind \$(pwd):/score-client/logs"
} else if (workflow.containerEngine == "docker") {
containerOptions "-v \$(pwd):/score-client/logs"
}

input:
path analysis
val study_id
val analysis_id

output:
path analysis, emit: analysis_json
path 'out/*', emit: files


script:
accessToken = params.api_token ? params.api_token : "`cat /tmp/rdpc_secret/secret`"
"""
export METADATA_URL=${params.song_url}
export STORAGE_URL=${params.score_url}
export TRANSPORT_PARALLEL=${params.cpus}
export TRANSPORT_MEMORY=${params.transport_mem}
export ACCESSTOKEN=${accessToken}
score-client download --analysis-id ${analysis_id} --study-id ${study_id} --output-dir ./out
"""
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env nextflow
nextflow.enable.dsl=2

// processes resources
params.cpus = 1
params.mem = 1

params.publish_dir = ""

params.max_retries = 5 // set to 0 will disable retry
params.first_retry_wait_time = 1 // in seconds

// required params w/ default
params.container = "ghcr.io/overture-stack/song-client"
params.container_version = "5.0.2"

// optional if secret mounted from pod else required
params.api_token = "" // song/score API token for download process

// required params, no default
// --song_url song url for download process
// --score_url score url for download process

process songGetAnalysis {
maxRetries params.max_retries
errorStrategy {
sleep(Math.pow(2, task.attempt) * params.first_retry_wait_time * 1000 as long); // backoff time increases exponentially before each retry
return params.max_retries ? 'retry' : 'finish'
}

pod = [secret: workflow.runName + "-secret", mountPath: "/tmp/rdpc_secret"]

cpus params.cpus
memory "${params.mem} GB"

container "${ params.song_container ?: params.container}:${params.song_container_version ?: params.container_version}"
publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir ? true : false

tag "${analysis_id}"

if (workflow.containerEngine == "singularity") {
containerOptions "--bind \$(pwd):/song-client/logs"
} else if (workflow.containerEngine == "docker") {
containerOptions "-v \$(pwd):/song-client/logs"
}

input:
val study_id
val analysis_id

output:
path "*.analysis.json", emit: json


script:
accessToken = params.api_token ? params.api_token : "`cat /tmp/rdpc_secret/secret`"
"""
export CLIENT_SERVER_URL=${params.song_url}
export CLIENT_STUDY_ID=${study_id}
export CLIENT_ACCESS_TOKEN=${accessToken}
sing search -a ${analysis_id} > ${analysis_id}.analysis.json
"""
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/usr/bin/env nextflow

/*
Copyright (c) 2020-2021, Ontario Institute for Cancer Research
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Authors:
Alex Lepsa
Junjun Zhang
*/

nextflow.enable.dsl = 2
version = '2.9.0'

// universal params go here, change default value as needed
params.publish_dir = "" // set to empty string will disable publishDir

params.max_retries = 5 // set to 0 will disable retry
params.first_retry_wait_time = 1 // in seconds

// tool specific parmas go here, add / change as needed
params.study_id = ""
params.analysis_id = ""

params.api_token = ""

params.song_cpus = 1
params.song_mem = 1 // GB
params.song_url = "https://song.rdpc-qa.cancercollaboratory.org"
params.song_api_token = ""
params.song_container = "ghcr.io/overture-stack/song-client"
params.song_container_version = "5.0.2"

params.score_cpus = 1
params.score_mem = 1 // GB
params.score_transport_mem = 1 // GB
params.score_url = "https://score.rdpc-qa.cancercollaboratory.org"
params.score_api_token = ""
params.score_container = "ghcr.io/overture-stack/score"
params.score_container_version = "5.8.1"


song_params = [
*:params,
'cpus': params.song_cpus,
'mem': params.song_mem,
'song_url': params.song_url,
'song_container': params.song_container,
'song_container_version': params.song_container_version,
'api_token': params.song_api_token ?: params.api_token
]

score_params = [
*:params,
'cpus': params.score_cpus,
'mem': params.score_mem,
'transport_mem': params.score_transport_mem,
'song_url': params.song_url,
'score_url': params.score_url,
'score_container': params.score_container,
'score_container_version': params.score_container_version,
'api_token': params.score_api_token ?: params.api_token
]


include { songGetAnalysis as songGet } from './local_modules/song-get-analysis' params(song_params)
include { scoreDownload as scoreDn } from './local_modules/score-download' params(score_params)


// please update workflow code as needed
workflow SongScoreDownload {
take: // update as needed
study_id
analysis_id

main:
songGet(study_id, analysis_id)
scoreDn(songGet.out.json, study_id, analysis_id)

emit:
analysis_json = songGet.out.json
files = scoreDn.out.files
}


// this provides an entry point for this main script, so it can be run directly without clone the repo
// using this command: nextflow run <git_acc>/<repo>/<pkg_name>/<main_script>.nf -r <pkg_name>.v<pkg_version> --params-file xxx
workflow {
SongScoreDownload(
params.study_id,
params.analysis_id
)
}
Loading

0 comments on commit 4c5cb5d

Please sign in to comment.