forked from geraldinepascal/FROGS-wrappers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
remove_chimera.xml
174 lines (129 loc) · 5.94 KB
/
remove_chimera.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
<?xml version="1.0"?>
<!--
# Copyright (C) 2015 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-->
<tool id="FROGS_remove_chimera" name="FROGS Remove chimera" version="3.1">
<description>Remove PCR chimera in each sample.</description>
<requirements>
<requirement type="package" version="3.1.0">frogs</requirement>
</requirements>
<stdio>
<exit_code range="1:" />
<exit_code range=":-1" />
</stdio>
<command>
remove_chimera.py
--nb-cpus \${GALAXY_SLOTS:-1}
--input-fasta $sequence_file --non-chimera $non_chimera_fasta
--summary $summary_file
#if $abundance_type.abundance_type_selected == "biom"
--input-biom $abundance_biom
--out-abundance $out_abundance_biom
#else
--input-count $abundance_count
--out-abundance $out_abundance_count
#end if
</command>
<inputs>
<!-- Files -->
<param format="fasta" name="sequence_file" type="data" label="Sequences file" help="The sequences file (format: fasta)." optional="false" />
<conditional name="abundance_type">
<param name="abundance_type_selected" type="select" label="Abundance type" help="Select the type of file where the abundance of each sequence by sample is stored.">
<option value="biom" selected="true">BIOM file</option>
<option value="count">TSV file</option>
</param>
<when value="biom">
<param format="biom1" name="abundance_biom" type="data" label="Abundance file" help="It contains the count by sample for each sequence." optional="false" />
</when>
<when value="count">
<param format="tabular" name="abundance_count" type="data" label="Count file" help="It contains the count by sample for each sequence (see below)." optional="false" />
</when>
</conditional>
</inputs>
<outputs>
<data format="fasta" name="non_chimera_fasta" label="${tool.name}: non_chimera.fasta" from_work_dir="non_chimera.fasta"/>
<data format="biom1" name="out_abundance_biom" label="${tool.name}: non_chimera_abundance.biom" from_work_dir="non_chimera_abundance.biom">
<filter>abundance_type['abundance_type_selected'] == "biom"</filter>
</data>
<data format="tabular" name="out_abundance_count" label="${tool.name}: non_chimera_abundance.tsv" from_work_dir="non_chimera_abundance.tsv">
<filter>abundance_type['abundance_type_selected'] == "count"</filter>
</data>
<data format="html" name="summary_file" label="${tool.name}: report.html" from_work_dir="report.html"/>
</outputs>
<tests>
<test>
<param name="sequence_file" value="references/02-clustering.fasta"/>
<conditional name="abundance_type">
<param name="abundance_type_selected" value="biom"/>
<param name="abundance_biom" value="references/02-clustering.biom" />
</conditional>
<output name="non_chimera_fasta" file="references/03-chimera.fasta"/>
</test>
</tests>
<help>
.. image:: static/images/frogs_images/FROGS_logo.png
:height: 144
:width: 110
.. class:: infomark page-header h2
What it does
Remove chimeric sequences by sample.
.. class:: infomark page-header h2
Context
Chimeras are sequences formed from two or more biological sequences joined together.
The majority of these anomalous sequences are formed from an incomplete extension during a PCR cycle. During subsequent cycles, a partially extended strand can bind to a template derived from a different but similar sequence.
This phenomena is particularly common in amplicon sequencing where closely related sequences are amplified.
.. class:: infomark page-header h2
Inputs/Outputs
.. class:: h3
Inputs
**Sequence file**:
The sequences (format `FASTA <https://en.wikipedia.org/wiki/FASTA_format>`_).
**Abundance file**:
The abundance of each cluster in each sample (format `BIOM <http://biom-format.org/>`_).
OR
The abundance of each sequence in each sample (format `TSV <https://en.wikipedia.org/wiki/Tab-separated_values>`_). This type of file is produced by *FROGS pre-process*.
Example::
#id splA splB
seq1 1289 2901
seq2 3415 0
.. class:: h3
Outputs
**Sequence file** (non_chimera.fasta):
The sequence file with only non-chimera (format `FASTA <https://en.wikipedia.org/wiki/FASTA_format>`_).
**Abundance file** (non_chimera.biom or non_chimera.tsv):
The abundance file with only non-chimera (format the same of the abundance input).
**Summary file** (report.html):
This file presents the number of removed elements (format `HTML <https://en.wikipedia.org/wiki/HTML>`_).
.. class:: infomark page-header h2
How it works
.. csv-table::
:header: "Steps", "Description"
:widths: 10, 90
:class: table table-striped
"1", "Split input data by sample (classicaly the PCR is realised by sample)."
"2", "Find chimera in each sample (`vsearch <https://github.com/torognes/vsearch>`_)."
"3", "Remove the sequences identify as chimera in all samples where they are present."
----
**Contact**
Contacts: [email protected]
Repository: https://github.com/geraldinepascal/FROGS
website: http://frogs.toulouse.inra.fr/
Please cite the **FROGS article**: *Escudie F., et al. Bioinformatics, 2018. FROGS: Find, Rapidly, OTUs with Galaxy Solution.*
</help>
<citations>
<citation type="doi">10.1093/bioinformatics/btx791</citation>
</citations>
</tool>