Skip to content

Commit

Permalink
Merge pull request #166 from nf-core/sponging
Browse files Browse the repository at this point in the history
Improve circRNA-miRNA interaction analysis
  • Loading branch information
mweyrich28 authored Aug 8, 2024
2 parents 4062690 + 5082d4f commit b233bb8
Show file tree
Hide file tree
Showing 47 changed files with 2,063 additions and 350 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,12 @@ If you want to contribute, feel free to create an issue or pull request on the [
- Quantification of combined circular and linear transcriptome
- [`psirc-quant`](https://github.com/Christina-hshi/psirc)
- miRNA binding affinity analysis (only if the `mature` parameter is provided)
- [`miRanda`](http://cbio.mskcc.org/miRNA2003/miranda.html)
- [`TargetScan`](http://www.targetscan.org/cgi-bin/targetscan/data_download.vert72.cgi)
- Normalizes miRNA expression (only if the `mirna_expression` parameter is provided)
- Binding site prediction
- [`miRanda`](http://cbio.mskcc.org/miRNA2003/miranda.html)
- [`TargetScan`](http://www.targetscan.org/cgi-bin/targetscan/data_download.vert72.cgi)
- Perform majority vote on binding sites
- Compute correlations between miRNA and transcript expression levels (only if the `mirna_expression` parameter is provided)
- Statistical tests (only if the `phenotype` parameter is provided)
- [`CircTest`](https://github.com/dieterich-lab/CircTest)
- MultiQC report [`MultiQC`](http://multiqc.info/)
Expand Down
72 changes: 68 additions & 4 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -795,7 +795,7 @@ process {
}

withName: ADD_BACKSPLICE {
ext.args = "'{ if (/^>/) { print \$0 } else { start = substr(\$0, 1, 25); print \$0 start } }'"
ext.args = "-c fastx '{ if (\$name ~ /^circ_/) { \$seq = \$seq substr(\$seq, 1, 25) } print \">\" \$name; print \$seq }'"
ext.suffix = "backspliced.fa"
publishDir = [
path: { "${params.outdir}/mirna_prediction" },
Expand All @@ -804,6 +804,30 @@ process {
]
}

withName: UNIFY_MIRANDA {
ext.args = "-v FS='\\t' -v OFS='\\t' 'NR>1 { print \$1, \$2, \$7, \$8, \"miranda\" }'"
ext.suffix = "miranda.tsv"
publishDir = [
path: { "${params.outdir}/mirna_prediction/binding_sites/tools/miranda/unified" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
}

withName: UNIFY_TARGETSCAN {
ext.args = "-v FS='\\t' -v OFS='\\t' 'NR>1 { print \$2, \$1, \$6, \$7, \"targetscan\" }'"
ext.suffix = "targetscan.tsv"
publishDir = [
path: { "${params.outdir}/mirna_prediction/binding_sites/tools/targetscan/unified" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
}

withName: COMBINE_BINDINGSITES {
ext.prefix = "bindingsites.tsv"
}

withName: COMBINE_TRANSCRIPTOME_GTFS {
ext.args = "-k 1,1 -k4,4n -k5,5n"
ext.suffix = "combined.gtf"
Expand Down Expand Up @@ -912,6 +936,22 @@ process {
]
}

withName: '.*:MIRNA_PREDICTION:DESEQ2_NORMALIZATION' {
publishDir = [
path: { "${params.outdir}/mirna_prediction/mirna_expression" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
}

withName: '.*:MIRNA_PREDICTION:MIRNA_FILTERING' {
publishDir = [
path: { "${params.outdir}/mirna_prediction/mirna_expression" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
}

withName: TARGETSCAN_DATABASE {
publishDir = [
path: { "${params.outdir}/references/mirna_prediction/targetscan" },
Expand All @@ -924,7 +964,7 @@ process {
withName: TARGETSCAN {
ext.prefix = { "${meta.id}.targetscan" }
publishDir = [
path: { "${params.outdir}/mirna_prediction/targetscan" },
path: { "${params.outdir}/mirna_prediction/binding_sites/tools/targetscan/output" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
pattern: "*.txt"
Expand All @@ -935,7 +975,7 @@ process {
ext.prefix = { "${meta.id}.miranda" }
ext.args = "-strict"
publishDir = [
path: { "${params.outdir}/mirna_prediction/miranda" },
path: { "${params.outdir}/mirna_prediction/binding_sites/tools/miranda/output" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
pattern: "*.txt"
Expand All @@ -944,13 +984,37 @@ process {

withName: MIRNA_TARGETS {
publishDir = [
path: { "${params.outdir}/mirna_prediction/combined" },
path: { "${params.outdir}/mirna_prediction/binding_sites/targets" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
pattern: "*.txt"
]
}

withName: COMBINE_BINDINGSITES {
publishDir = [
path: { "${params.outdir}/mirna_prediction/binding_sites/majority_vote" },
mode: params.publish_dir_mode,
saveAs: { filename -> ( filename != 'versions.yml' && params.save_intermediates ) ? filename : null }
]
}

withName: MAJORITY_VOTE {
publishDir = [
path: { "${params.outdir}/mirna_prediction/binding_sites/majority_vote" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
}

withName: '.*:MIRNA_PREDICTION:COMPUTE_CORRELATIONS' {
publishDir = [
path: { "${params.outdir}/mirna_prediction/correlation" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
}

withName: CIRCTEST_PREPARE {
publishDir = [
path: { "${params.outdir}/statistical_tests/circtest" },
Expand Down
74 changes: 63 additions & 11 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ The directories listed below will be created in the results directory after the
- samples: Per sample quantification results
- transcriptome: Combined linear and circular transcriptome, based on GTF file and detected BSJs
- mirna_prediction
- miranda
- targetscan
- combined
- binding_sites
- correlation
- mirna_expression
- statistical_tests
- circtest
- multiqc
Expand Down Expand Up @@ -369,6 +369,8 @@ The quantification is performed using psirc-quant, which is a wrapper around `ka
- `*.marked.fasta`: Transcript sequences in FASTA format with the circRNA sequences marked with a `C` field in the header.
- `*.tx2gene.tsv`: Transcript to gene mapping file.

</details>

### Per sample

<details markdown="1">
Expand Down Expand Up @@ -417,13 +419,22 @@ nf-core/circrna combines the sample-specific quantification results into proper

## miRNA Prediction

### miRanda
### Binding Sites

#### Tools

This section contains predicted binding sites for miRNA-target interactions generated by various computational tools.
Each tool utilizes unique algorithms and criteria to identify potential miRNA binding sites on target genomic sequences, providing complementary insights into miRNA regulatory networks.

##### miRanda

<details markdown="1">
<summary>Output files</summary>

- `mirna_prediction/miranda/`
- `*.miRanda.txt`: Raw outputs from `miRanda`.
- `mirna_prediction/bindingsites/tools/miranda/output`
- `*.miranda.txt`: Raw predictions from `miRanda`.
- `mirna_prediction/bindingsites/tools/miranda/unified`
- `*.miranda.tsv`: Unified predictions from `miRanda`.

</details>

Expand All @@ -432,24 +443,26 @@ nf-core/circrna combines the sample-specific quantification results into proper
1. First a dynamic programming local alignment is carried out between the query miRNA sequence and the reference sequence. This alignment procedure scores based on sequence complementarity and not on sequence identity.
2. Secondly, the algorithm takes high-scoring alignments detected from phase 1 and estimates the thermodynamic stability of RNA duplexes based on these alignments. This second phase of the method utilises folding routines from the `RNAlib` library, part of the [ViennaRNA](https://www.tbi.univie.ac.at/RNA/) package.

### TargetScan
##### TargetScan

<details markdown="1">
<summary>Output files</summary>

- `mirna_prediction/targetscan/`
- `*.targetscan.txt`: Raw outputs from `TargetScan`.
- `mirna_prediction/bindingsites/tools/targetscan/output`
- `*.targetscan.txt`: Raw predictions from `TargetScan`.
- `mirna_prediction/bindingsites/tools/targetscan/unified`
- `*.targetscan.tsv`: Unified predictions from `TargetScan`.

</details>

[TargetScan](http://www.targetscan.org/vert_72/) predicts biological targets of miRNAs by searching for the presence of conserved 8mer, 7mer, and 6mer sites within the circRNA mature sequence that match the seed region of each miRNA.

### Combined
#### Targets

<details markdown="1">
<summary>Output files</summary>

- `mirna_prediction/combined/`
- `mirna_prediction/binding_sites/targets`
- `*_miRNA_targets.txt`: Filtered target miRNAs of circRNAs called by quantification tools. Columns are self explanatory: miRNA, Score, Energy_KcalMol, Start, End, Site_type.

</details>
Expand All @@ -458,3 +471,42 @@ nf-core/circrna performs miRNA target filtering on `miRanda` and `TargetScan` pr

1. miRNA must be called by both `miRanda` and `TargetScan`.
2. If a site within the circRNA mature sequence shares duplicate miRNA ID's overlapping the same coordinates, the miRNA with the highest score is kept.

#### Majority Vote

<details markdown="1">
<summary>Output files</summary>

- `mirna_prediction/binding_sites/majority_vote`
- `mirna.targets.tsv`: Stores miRNA-target mappings with all targets listed per miRNA, making it compact and suitable for bulk analyses.
- `mirna.majority.tsv`: Lists each miRNA-target interaction on a separate line, which is helpful for detailed analysis of each interaction independently.

</details>

nf-core/circrna performs a majority vote on the predicted miRNA targets from [TargetScan](http://www.targetscan.org/vert_72/) and [miRanda](http://cbio.mskcc.org/miRNA2003/miranda.html) based on a
threshold specified by the user.

### miRNA Expression

<details markdown="1">
<summary>Output files</summary>

- `mirna_prediction/mirna_expression/`
- `mirna.normalized_counts.tsv`: Contains normalized miRNA expression of all samples.
- `mirna.normalized_counts_filtered.tsv`: Contains miRNA expression after filtering.

</details>

nf-core/circrna processes miRNA expression data by normalizing and filtering it for further analysis.

### Correlation

<details markdown="1">
<summary>Output files</summary>

- `mirna_prediction/correlation`
- `*.tsv`: Files named after the specific miRNA containing correlation results for that miRNA with its target transcripts.

</details>

nf-core/circrna computes correlations between miRNA and transcript expression levels and writes the results to individual TSV files for each miRNA-target interaction specified in the input binding sites file.
9 changes: 7 additions & 2 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,17 @@ The output of the annotation step will be bundled with the outputs of the BSJ de

## miRNA prediction

This section allows looking for miRNA binding sites in the circRNAs. The following tools are currently supported:
This section allows looking for miRNA binding sites in the circRNAs.
The following tools are currently supported:

- `miRanda`
- `TargetScan`

This section will only be executed if the `mature` parameter is provided. The parameter should point to a FASTA file containing mature miRNA sequences.
This section will only be executed if the `mature` parameter is provided.
The parameter `mature` should point to a FASTA file containing mature miRNA sequences.
By providing a TSV file containing the miRNA expression of all samples via `mirna_expression`, this
sub-workflow will perform additional normalization and filtering of `mirna_expression` and `mature` before
executing the miRNA binding size prediction.

To view the outputs of the module, please see the output [documentation](https://nf-co.re/circrna/dev/output#mirna-prediction).

Expand Down
4 changes: 3 additions & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ workflow NFCORE_CIRCRNA {
ch_mature = params.mature ? Channel.value([[id: "mature"], file(params.mature, checkIfExists:true)]) : Channel.empty()
ch_phenotype = params.phenotype ? Channel.value([[id: "phenotype"], file(params.phenotype, checkIfExists:true)]) : Channel.empty()
ch_annotation = params.annotation ? Channel.fromSamplesheet("annotation") : Channel.empty()
ch_mirna = params.mature && params.mirna_expression ? Channel.value([[id: "mirna"], file(params.mirna_expression, checkIfExists:true)]) : Channel.empty()

CIRCRNA (
ch_samplesheet,
Expand All @@ -68,7 +69,8 @@ workflow NFCORE_CIRCRNA {
ch_gtf,
ch_mature,
ch_annotation,
ch_versions
ch_versions,
ch_mirna
)

emit:
Expand Down
22 changes: 14 additions & 8 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
"git_sha": "571a5feac4c9ce0a8df0bc15b94230e7f3e8db47",
"installed_by": ["modules"]
},
"bioawk": {
"branch": "master",
"git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
"installed_by": ["modules"],
"patch": "modules/nf-core/bioawk/bioawk.diff"
},
"bowtie/align": {
"branch": "master",
"git_sha": "3c77ca9aac783e76c3614a06db3bfe4fef619bde",
Expand Down Expand Up @@ -142,32 +148,32 @@
},
"samtools/flagstat": {
"branch": "master",
"git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
"git_sha": "46eca555142d6e597729fcb682adcc791796f514",
"installed_by": ["bam_stats_samtools"]
},
"samtools/idxstats": {
"branch": "master",
"git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
"git_sha": "46eca555142d6e597729fcb682adcc791796f514",
"installed_by": ["bam_stats_samtools"]
},
"samtools/index": {
"branch": "master",
"git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
"git_sha": "46eca555142d6e597729fcb682adcc791796f514",
"installed_by": ["bam_sort_stats_samtools", "modules"]
},
"samtools/sort": {
"branch": "master",
"git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
"git_sha": "46eca555142d6e597729fcb682adcc791796f514",
"installed_by": ["bam_sort_stats_samtools", "modules"]
},
"samtools/stats": {
"branch": "master",
"git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
"git_sha": "46eca555142d6e597729fcb682adcc791796f514",
"installed_by": ["bam_stats_samtools"]
},
"samtools/view": {
"branch": "master",
"git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
"git_sha": "6c2309aaec566c0d44a6cf14d4b2d0c51afe2e91",
"installed_by": ["modules"]
},
"segemehl/align": {
Expand Down Expand Up @@ -211,12 +217,12 @@
"nf-core": {
"bam_sort_stats_samtools": {
"branch": "master",
"git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
"git_sha": "46eca555142d6e597729fcb682adcc791796f514",
"installed_by": ["subworkflows"]
},
"bam_stats_samtools": {
"branch": "master",
"git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
"git_sha": "0eacd714effe5aac1c1de26593873960b3346cab",
"installed_by": ["bam_sort_stats_samtools", "subworkflows"]
},
"utils_nextflow_pipeline": {
Expand Down
7 changes: 7 additions & 0 deletions modules/local/compute_correlations/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: "compute_correlations"
channels:
- conda-forge
- defaults
- bioconda
dependencies:
- "bioconda::bioconductor-fishpond=2.8.0--r43hdfd78af_0"
Loading

0 comments on commit b233bb8

Please sign in to comment.