Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/usegalaxy-au/tools-au
Browse files Browse the repository at this point in the history
  • Loading branch information
neoformit committed Jun 25, 2024
2 parents 55772b6 + 8c6af15 commit c5aebed
Show file tree
Hide file tree
Showing 16 changed files with 635 additions and 8 deletions.
150 changes: 148 additions & 2 deletions tools/cellranger/cellranger.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
<command><![CDATA[
#import re
#import os
#set tool_type = $tool_cond.tool
#if $tool_type == "count":
Expand All @@ -21,7 +22,30 @@
#end for
#elif str($tool_cond.tool) == "mkgtf":
#set attributeTag ="--attribute"
#set attribute=str(" ".join(["%s%s" % (str("--attribute=gene_biotype:"), str(ft)) for ft in $tool_cond.attributes]))
#set attribute=str(" ".join(["%s%s" % (str("--attribute=gene_biotype:"), str(ft)) for ft in $tool_cond.attributes]))
#elif str($tool_cond.tool) == "multi":
mkdir output_directory &&
#set input_directory='input_data_directory'
touch tmp &&
#for $input in $tool_cond.inputs:
#set sample_name_directory=$input.input_collection.element_identifier
#set lib_type = $input.type
#set sample_fastq_directory = '/'.join([str("/"),str($input_directory),str($sample_name_directory)])
#if str($input.lanes.lane_source.lane_source_selector) == "user_define":
#set num_lanes = "%s" % ("|".join($input.lanes.lane_source.lane))
#set library_record=','.join([str($sample_name_directory),str($sample_fastq_directory),str($num_lanes),str($lib_type)])
echo '$library_record' >> tmp &&
#else:
#set library_record=','.join([str($sample_name_directory),str($sample_fastq_directory),str(""),str($lib_type)])
echo '$library_record' >> tmp &&
#end if
mkdir -p $input_directory/$sample_name_directory &&
#set collection_identifier = re.sub('[^\s\w\-]', '_', str($input.input_collection.element_identifier))
#for $f in $input.input_collection:
#set identifier = re.sub('[^\s\w\-\\.]','_',str($f.element_identifier))
ln -sf '$f' "\$(pwd)"/$input_directory/$sample_name_directory/$identifier &&
#end for
#end for
#end if
#if str($tool_cond.tool) == "count"
Expand Down Expand Up @@ -60,15 +84,69 @@
2>&1
#elif str($tool_cond.tool) == "mkgtf"
cellranger mkgtf $raw_gtf $filtered_gtf $attribute
#elif str($tool_cond.tool) == "multi"
cp '$multi_config' 'config.txt' &&
cat tmp >> 'config.txt' &&
sed -i "s|input_data_directory|`pwd`/input_data_directory|g" config.txt &&
cellranger multi --id=output_directory
--csv=config.txt
--localcores=\${GALAXY_SLOTS:-2}
--localmem=\${GALAXY_MEMORY_GB:-8}
--disable-ui
&& gunzip -f output_directory/outs/per_sample_outs/output_directory/count/sample_filtered_feature_bc_matrix/matrix.mtx.gz
&& gunzip -f output_directory/outs/per_sample_outs/output_directory/count/sample_filtered_feature_bc_matrix/features.tsv.gz
&& gunzip -f output_directory/outs/per_sample_outs/output_directory/count/sample_filtered_feature_bc_matrix/barcodes.tsv.gz
&& rm tmp
&& 2>&1
#end if
]]></command>
<configfiles>
<configfile name="multi_config"><![CDATA[
#import re
#set $lib_type_multi = list()
#for $input in $tool_cond.inputs:
#set lib_type = $input.type
#if str($lib_type) not in $lib_type_multi:
$lib_type_multi.append('%s' %(str($input.type)))
#end if
#if str($lib_type) == "Gene Expression":
#set selected_gex_ref = $input.reference_source.ref_file.fields.path
#set has_bam = $tool_cond.GEX.no_bam
#elif str($lib_type) == "VDJ":
#set selected_vdj_ref = $input.reference_source.ref_file.fields.path
#else:
#set selected_gex_ref = $input.reference_source.ref_file.fields.path
#set has_bam = $tool_cond.GEX.no_bam
#set selected_vdj_ref = $input.reference_source.ref_file.fields.path
#end if
#end for
#if str("Gene Expression") in $lib_type_multi and str("VDJ") in $lib_type_multi:
[gene-expression]
reference, ${selected_gex_ref}
no-bam,${has_bam}
[vdj]
reference, ${selected_vdj_ref}
#elif str($lib_type) == "Gene Expression":
[gene-expression]
reference, ${selected_gex_ref}
no-bam,${has_bam}
#elif str($lib_type) == "VDJ" or "Gene Expression" not in $lib_type_multi:
[vdj]
reference, ${selected_vdj_ref}
#end if
[libraries]
fastq_id,fastqs,lanes,feature_types,subsample_rate
]]></configfile>
</configfiles>
<inputs>
<conditional name="tool_cond">
<param name="tool" type="select" label="Select a CellRanger tool" help="CellRanger tool: count, mkref or mkgtf.">
<param name="tool" type="select" label="Select a CellRanger tool" help="CellRanger tool: count, mkref, mkgtf, multi.">
<option value="count" selected="True">count</option>
<option value="mkref">mkref</option>
<option value="mkgtf">mkgtf</option>
<option value="multi">multi</option>
</param>
<when value="count">
<param name="input_collection" type="data_collection" format="fastq.gz,fastqsanger.gz,fastq" collection_type="list" label="Input Collection" help="A list of paired-end FASTQ files in a collection."/>
Expand Down Expand Up @@ -101,6 +179,25 @@
<expand macro="feature_type"/>
</param>
</when>
<when value="multi">
<repeat name="inputs" title="Input Collections" min="1">
<param name="input_collection" type="data_collection" format="fastq.gz,fastqsanger.gz,fastq" collection_type="list" label="Input Collection"/>
<param name="type" type="select" label="library type" multiple="false" help="Select library type.">
<expand macro="library_type"/>
</param>
<section name="lanes">
<expand macro="number_of_lane"/>
</section>
<expand macro="db_reference"/>
</repeat>
<section name="GEX" title="Gene Expresion Options">
<expand macro="gene_expression_options"/>
<expand macro="chemistry"/>
</section>
<section name="VDJ" title="VDJ options">
<expand macro="vdj"/>
</section>
</when>
</conditional>
</inputs>
<outputs>
Expand Down Expand Up @@ -131,6 +228,38 @@
<data name="tar_ref_output" format="tgz" label="${tool.name} on ${on_string}: A tarball of the custom reference">
<filter>tool_cond['tool'] == 'mkref'</filter>
</data>
<data format="html" name="output_summary" label="Summary from ${tool.name} on ${on_string}" from_work_dir="output_directory/outs/per_sample_outs/output_directory/web_summary.html" >
<filter>tool_cond['tool'] == 'multi'</filter>
</data>
<data format="binary" name="cloupe" label="Cloupe file from ${tool.name} on ${on_string}" from_work_dir="output_directory/outs/per_sample_outs/output_directory/count/sample_cloupe.cloupe">
<filter>tool_cond['tool'] == 'multi'</filter>
</data>
<data format="mtx" name="matrix" label="Matrix file from ${tool.name} on ${on_string}" from_work_dir="output_directory/outs/per_sample_outs/output_directory/count/sample_filtered_feature_bc_matrix/matrix.mtx">
<filter>tool_cond['tool'] == 'multi'</filter>
</data>
<data format="tabular" name="feature" label="Feature file from ${tool.name} on ${on_string}" from_work_dir="output_directory/outs/per_sample_outs/output_directory/count/sample_filtered_feature_bc_matrix/features.tsv">
<filter>tool_cond['tool'] == 'multi'</filter>
</data>
<data format="tabular" name="barcode" label="Barcode file from ${tool.name} on ${on_string}" from_work_dir="output_directory/outs/per_sample_outs/output_directory/count/sample_filtered_feature_bc_matrix/barcodes.tsv">
<filter>tool_cond['tool'] == 'multi'</filter>
</data>
<data format="txt" name="multi_config_out" from_work_dir="config.txt" label="${tool.name} on ${on_string}: config">
<filter>tool_cond['tool'] == 'multi'</filter>
</data>
<collection name="multi_output" type="list" label="${tool.name} on ${on_string}: multi">
<discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;h5)" directory="output_directory/outs/per_sample_outs/output_directory/count" format="h5" visible="false" />
<discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;bam)" directory="output_directory/outs/per_sample_outs/output_directory/count" format="bam" visible="false" />
<filter>tool_cond['tool'] == 'multi' and tool_cond['GEX']['no_bam']</filter>
</collection>
<collection name="vdj_output" type="list" label="${tool.name} on ${on_string}: multi vdj output">
<discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;vloupe)" directory="output_directory/outs/per_sample_outs/output_directory/vdj_b" format="binary" visible="false" />
<discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;tsv)" directory="output_directory/outs/per_sample_outs/output_directory/vdj_b" format="tsv" visible="false" />
<discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;csv)" directory="output_directory/outs/per_sample_outs/output_directory/vdj_b" format="csv" visible="false" />
<discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;fastq)" directory="output_directory/outs/per_sample_outs/output_directory/vdj_b" format="fastq" visible="false" />
<discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;fasta)" directory="output_directory/outs/per_sample_outs/output_directory/vdj_b" format="fasta" visible="false" />
<discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;bam)" directory="output_directory/outs/per_sample_outs/output_directory/vdj_b" format="bam" visible="false" />
<filter>tool_cond['tool'] == 'multi' and tool_cond['inputs']['type'] == "VDJ"</filter>
</collection>
</outputs>

<tests>
Expand All @@ -151,6 +280,7 @@ Cell Ranger is a set of analysis pipelines that process Chromium single cell dat
- count : aligns sequencing reads in FASTQ files to a reference transcriptome
- mkref : build a custom reference
- mkgtf : filter GTF files with the feature attributes (i.e gene_biotype:protein_coding)
- multi : tool for analyzing 3' Cell Multiplexing data
**CellRanger Count**
Expand Down Expand Up @@ -197,12 +327,28 @@ Cell Ranger is a set of analysis pipelines that process Chromium single cell dat
- A filtered reference genome GTF file
**CellRanger multi**
**Input**
1) Single Cell gene expression dataset in a collection named by the sample name. Example, if the gene expression sample name is sc5p_v2_hs_B_1k_5gex_S1_L001_I1_001.fastq.gz and the collection name should be formatted as sc5p_v2_hs_B_1k_5gex (the partial prefix of the gene expression sample name).
2) Single Cell VDJ dataset in a collection named by the sample name. Example, if the VDJ sample name is sc5p_v2_hs_B_1k_b_S1_L001_I1_001.fastq.gz and the collection name should be formatted as sc5p_v2_hs_B_1k_b ( the partial prefix of the VDJ sample name).
3) both 1) and 2)
**Output**
1) Gene expression - A summary file in html format, two h5 files, Cloupe, barcode, feature and a matrix file.
2) VDJ - Vloupe (Cellranger supported file format), clonotypes, airr_rearrangement, consensus_annotations, filtered_contig_annotations, filtered_contig, concat_ref and consensus.
3) Both output 1 and output 2 will be generated only if both gene expression and VDJ are used as an input.
.. class:: infomark
**More Information**
- `CellRanger`: https://support.10xgenomics.com/docs/citations
- `Output` : see more https://www.10xgenomics.com/support/software/cell-ranger/latest/analysis/outputs/cr-5p-outputs-overview-vdj
**Citations for 10x Genomics Publications**
]]></help>
Expand Down
80 changes: 79 additions & 1 deletion tools/cellranger/macros.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<macros>
<token name="@TOOL_VERSION@">7.1.0</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@VERSION_SUFFIX@">1</token>
<token name="@PROFILE@">22.05</token>
<token name="@VERSION@">7.1.0</token>
<xml name="requirements">
Expand All @@ -17,6 +17,38 @@
<xml name="attribute_option" token_value="default">
<option value="@VALUE@" selected="true">@VALUE@</option>
</xml>
<xml name="db_reference">
<conditional name="reference_source">
<param name="reference_source_selector" type="select" label="Will you select a reference genome from your history or use a built-in index?">
<option value="cached">Use a built-in genome index</option>
<option value="history">Use a genome from history and build index</option>
</param>
<when value="cached">
<param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list">
<options from_data_table="cellranger_db">
<filter type="sort_by" column="2" />
<validator type="no_options" message="No reference genomes are available" />
</options>
<validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
</param>
</when>
<when value="history">
<param name="own_ref_file" type="data" format="tgz" hierarchy="recurse" label="Use the following dataset as the reference sequence" help="You can upload a tarball formatted in CellRanger format as reference" />
</when>
</conditional>
</xml>
<xml name="chemistry">
<param name="chemistry_list" type="select" label="Select chemistry">
<option value="auto" selected="true">auto</option>
<option value="threeprime" >Single Cell 3'</option>
<option value="fiveprime">Single Cell 5'</option>
<option value="SC3Pv1">Single Cell 3' v1</option>
<option value="SC3Pv2">Single Cell 3' v2</option>
<option value="SC5P-PE">SC5P-PE</option>
<option value="SC5P-R2">SC5P-R2 for R2-only</option>
<option value="SC-FB">SC-FB for Single Cell AntiBody-only</option>
</param>
</xml>
<xml name="feature_type">
<expand macro="attribute_option" value="protein_coding"/>
<expand macro="attribute_option" value="lncRNA"/>
Expand All @@ -35,4 +67,50 @@
<expand macro="attribute_option" value="TR_V_pseudogene"/>
<expand macro="attribute_option" value="TR_J_pseudogene"/>
</xml>
<xml name="library_type">
<expand macro="attribute_option" value="Gene Expression"/>
<expand macro="attribute_option" value="VDJ"/>
<expand macro="attribute_option" value="VDJ-T"/>
<expand macro="attribute_option" value="VDJ-T-GD"/>
<expand macro="attribute_option" value="VDJ-B"/>
<expand macro="attribute_option" value="Antibody Capture"/>
<expand macro="attribute_option" value="Antigen Capture (BEAM)"/>
<expand macro="attribute_option" value="CRISP Guide Capture"/>
</xml>
<xml name="gene_expression_options">
<param name="no_target_umi_filter" type="boolean" truevalue="true" falsevalue="false" checked="False" label="No target umi filter"/>
<param name="r1_length" type="text" optional="True" label="R1 length" help=""/>
<param name="r2_length" type="text" optional="True" label="R2 length" help=""/>
<param name="expect_cells" type="text" optional="True" label="Expect cells" help=""/>
<param name="force_cells" type="text" optional="True" label="Force cells" help=""/>
<param name="include_introns" type="boolean" truevalue="true" falsevalue="false" checked="True" label="Include introns"/>
<param name="no_secondary" type="boolean" truevalue="true" falsevalue="false" checked="True" label="No Secondary"/>
<param name="no_bam" type="boolean" truevalue="true" falsevalue="false" checked="Talse" label="create bam file"/>
<param name="check_library_compatibility" type="boolean" truevalue="true" falsevalue="false" checked="True" label="Check library compatibility"/>
</xml>
<xml name="vdj">
<param name="inner_enrichment_primers" type="text" optional="True" label="Inner enrichment pimers"/>
<param name="r1_length" type="text" optional="True" label="R1 length"/>
<param name="r2_length" type="text" optional="True" label="R2 length"/>
</xml>
<xml name="number_of_lane">
<conditional name="lane_source">
<param name="lane_source_selector" type="select" label="Select number of lanes for your dataset?">
<option value="default">Default: all lanes</option>
<option value="user_define">User define</option>
</param>
<when value="default">
</when>
<when value="user_define">
<param name="lane" type="select" multiple="true" label="Select a list of lanes">
<option value="1">Lane 1</option>
<option value="2">Lane 2</option>
<option value="3">Lane 3</option>
<option value="4">Lane 4</option>
<option value="5">Lane 5</option>
<option value="6">Lane 6</option>
</param>
</when>
</conditional>
</xml>
</macros>
23 changes: 23 additions & 0 deletions tools/dorado/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
auto_tool_repositories:
name_template: "{{ tool_id }}"
description_template: "{{ tool_name }} from the dorado suite"
categories:
- Sequence Analysis
description: Dorado is a high-performance, easy-to-use, open source basecaller for Oxford Nanopore reads.
exclude:
- tool_test_output.html
- tool_test_output.json
homepage_url: https://github.com/nanoporetech/dorado
long_description: >
Dorado is a high-performance, easy-to-use, open source basecaller for Oxford Nanopore reads.
name: dorado
owner: galaxy-australia
remote_repository_url: https://github.com/usegalaxy-au/tools-au/tree/main/tools/dorado
suite:
name: suite_dorado
description: >
Dorado is a high-performance, easy-to-use, open source basecaller for Oxford Nanopore reads.
long_description: >
Dorado is a high-performance, easy-to-use, open source basecaller for Oxford Nanopore reads.
type: unrestricted
48 changes: 48 additions & 0 deletions tools/dorado/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@

## Tool versions

Dorado is distributed on
[DockerHub](https://hub.docker.com/r/nanoporetech/dorado/tags) by nanoporetech.
The containers are identified by sha256 hash, but not tagged with a version.

We can still use the containers and display the dorado version by hard-coding
both dorado version and container hash into the wrapper (see `macros.xml`).
Unfortunately you have to pull a >6 GB container and run `dorado --version` just
to check the tool version. This also prevents auto-updates of this wrapper.

You can update the list of models at the same time (see
below). **You must do this when you update the wrapper**.

## Basecalling models

The models are bundled in the container at `/models` and made available by the
`dorado_models.loc` file.

The columns are `value`, `container_hash`, `name` and `path`.

To update the list, modify `tool-data/dorado_models.loc.sample`.

Because models can be added and removed, models are listed **per container** in
the loc file.

Here's some code to **append** the models from the container with hash
`1c65eb070a9fc1d88710c4dc09b06541f96fdd28` to the loc file.

```bash
export DORADO_HASH="1c65eb070a9fc1d88710c4dc09b06541f96fdd28"

apptainer exec "docker://nanoporetech/dorado:sha${DORADO_HASH}" \
ls /models | \
awk -v hash="${DORADO_HASH}" '{print hash "_" $0 "\t" hash "\t" $0 "\t/models/" $0}' \
>> tool-data/dorado_models.loc.sample
```

The loc file doesn't have a header, so you can keep it sorted.

```bash
cp tool-data/dorado_models.loc.sample \
tool-data/dorado_models.loc.sample.old &&
sort -t$'\t' -k1,1V tool-data/dorado_models.loc.sample.old \
> tool-data/dorado_models.loc
```

Loading

0 comments on commit c5aebed

Please sign in to comment.