From 5c8f294da4f30080d8650ce67c7e561fd6420856 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@fgenesh-galaxy.novalocal>
Date: Thu, 18 Jan 2024 06:35:19 +0000
Subject: [PATCH] update and split fgenesh tools

---
 tools/fgenesh/fgenesh_annotate.xml     | 295 +++++++++++++++++++++++++
 tools/fgenesh/fgenesh_get_mrnas_gc.xml |  80 +++++++
 tools/fgenesh/fgenesh_merge.xml        |  87 ++++++++
 tools/fgenesh/fgenesh_split.xml        |  93 ++++++++
 tools/fgenesh/fgenesh_to_genbank.xml   | 105 +++++++++
 tools/fgenesh/macros.xml               |   3 +-
 6 files changed, 661 insertions(+), 2 deletions(-)
 create mode 100644 tools/fgenesh/fgenesh_annotate.xml
 create mode 100644 tools/fgenesh/fgenesh_get_mrnas_gc.xml
 create mode 100644 tools/fgenesh/fgenesh_merge.xml
 create mode 100644 tools/fgenesh/fgenesh_split.xml
 create mode 100644 tools/fgenesh/fgenesh_to_genbank.xml
diff --git a/tools/fgenesh/fgenesh_annotate.xml b/tools/fgenesh/fgenesh_annotate.xml
new file mode 100644
index 00000000..346e6a82
--- /dev/null
+++ b/tools/fgenesh/fgenesh_annotate.xml
@@ -0,0 +1,295 @@
+<tool id="fgenesh_annotate" name="FGENESH annotate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" >
+    <description>sequences</description>
+    <macros>
+	  <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <command><![CDATA[
+	    mkdir -p result &&
+	    mkdir -p output_gff &&
+	    BLAST_PATH=\$(which blastp) > configfile &&
+	    echo "BLASTP = \$BLAST_PATH" >> configfile &&
+	    echo "BLAST2 = \$BLAST_PATH" >> configfile &&
+	    echo "NUM_THREADS = \${GALAXY_SLOTS:-4}"  >> configfile &&
+	    cat '$cfg' >> configfile &&
+	    ### cat seqlit - data preparation  &&
+	    #if $inputs.input_type == 'single':
+	    	#for $input in $inputs.single_seq
+	    	   ln -fs '$input' $input.element_identifier &&
+	    	   echo `pwd`/$input.element_identifier >> seqlist &&
+		   echo `pwd`/$input.element_identifier > '$input.element_identifier'.list &&
+	    	#end for
+		sort seqlist > sorted_seqlist &&
+	    	#if $repeat_sequence.selector == 'single_masked_seq':
+		   #for $seq in $repeat_sequence.masked_seq_single
+	    		ln -fs '$seq' $seq.element_identifier &&
+	    		echo `pwd`/$seq.element_identifier >> seqlistN && 
+			echo `pwd`/$seq.element_identifier > '$seq.element_identifier'.list &&
+	    	#end for
+		sort seqlistN > sorted_seqlistN &&
+		#end if
+	    #elif $inputs.input_type == 'multiple':
+	    	#for $i,$input in enumerate($inputs.multiple_seq)
+	    	    ln -fs '$input' $input.element_identifier &&
+	    	    echo `pwd`/$input.element_identifier >> seqlist && 
+		    echo `pwd`/$input.element_identifier > '$input.element_identifier'.list &&
+	    	#end for
+                #if $repeat_sequence.selector == 'multiple_masked_seq':
+                   #for $e,$mseq in enumerate($repeat_sequence.masked_seq_multiple)
+                        ln -fs '$mseq' $mseq.element_identifier &&
+	    		echo `pwd`/$mseq.element_identifier >> seqlistN_temp && 
+	    		paste seqlistN_temp seqlist | sort | cut -f1 > seqlistN && #### sort the filename in the seqlistN file to maintain the same order as the filename in seqlist file
+			### prep fo parallel command
+	    		echo `pwd`/$mseq.element_identifier > '$mseq.element_identifier'.list &&
+	    	   #end for
+		#end if
+	    #end if
+
+	    ### cat seqlist
+	    #if $repeat_sequence.selector == 'no_repeat_seq':
+
+	    	for s in `cat sorted_seqlist`;
+	    	do
+	    	   echo  "run_pipe.pl configfile -l '\$s'.list -d result_'\$(basename \$s)'";
+	    	done > fgenesh_parallel_command.sh &&
+		
+	    	cat fgenesh_parallel_command.sh | parallel --will-cite -j "\${GALAXY_SLOTS:-10}" &&
+
+	    	####run_pipe.pl configfile -l seqlist -d result &&
+		mv result_*/* result/ &&
+	    	run_fgenesh_2_gff3.pl result output_gff -sort -print_exons && 2>&1
+	    #elif $repeat_sequence.selector == 'single_masked_seq':
+
+
+                for s in `cat sorted_seqlist`;
+                do
+                   echo  "run_pipe.pl configfile -l '\$s'.list -m '\$s'.N.list -d result_'\$(basename \$s)'";
+                done > fgenesh_parallel_command.sh &&
+
+                cat fgenesh_parallel_command.sh | parallel --will-cite -j "\${GALAXY_SLOTS:-10}" &&
+
+	    	####run_pipe.pl configfile -l seqlist -m seqlistN -d result && 
+		mv result_*/* result/ &&
+	    	run_fgenesh_2_gff3.pl result output_gff -sort -print_exons && 2>&1
+	    #elif $repeat_sequence.selector == 'multiple_masked_seq':
+	    
+	    	for s in `cat seqlist`;
+	    	do
+	     	    echo  "run_pipe.pl configfile -l '\$s'.list -m '\$s'.N.list -d result_'\$(basename \$s)'";
+	        done > fgenesh_parallel_command.sh &&
+
+	    	cat fgenesh_parallel_command.sh | parallel --will-cite -j "\${GALAXY_SLOTS:-10}" && 
+	    	###run_pipe.pl configfile -l seqlist -m seqlistN -d result &&
+		mv result_*/* result/ &&
+	    	run_fgenesh_2_gff3.pl result output_gff -sort -print_exons && 2>&1
+	    #end if
+
+	    ]]></command>
+    <configfiles>
+	    <configfile name="cfg"><![CDATA[
+GENE_PARAM = ${matrix_type.species_matrix.fields.path}
+PIPE_PARAM = ${db_type.genome_type.fields.path}
+PREDICT_GC = ${predict_gc}
+#if $map_mrna.mRNAs == '0'		    
+MAP_mRNAs = ${map_mrna.mRNAs}
+#else
+MAP_mRNAs = ${map_mrna.mRNAs}
+CDNA_FILE = ${map_mrna.cdna_file}
+PROT_FILE = ${map_mrna.prot_file}
+DAT_FILE = ${map_mrna.dat_file}
+#end if
+#if $map_est.ESTs == '0'
+MAP_ESTS = ${map_est.ESTs}
+#else
+MAP_ESTS = ${map_est.ESTs}
+EST_FILE = ${map_est.est_file}		    
+#end if
+USE_READS = ${use_reads}
+DIR_SITES = na
+PROG_PROT = ${use_proteins}
+USE_PROTEINS = ${use_proteins}
+PROTEIN_DB = ${nr_type.nr_db.fields.path}
+PROTEIN_DB_INDEX = ${nr_type.nr_db.fields.path}.ind
+PROTEIN_DB_TAG = NR
+BLAST_AI_PROTEINS =  ${find_homologs} # find homologs for ab initio predicted genes ( 0 - no , 1 - yes)
+INTRONIC_GENES = ${intronic_genes} 
+             ]]></configfile>
+    </configfiles>
+    <inputs>
+	    <conditional name="inputs">
+                <param name="input_type" type="select" label="Input type" help="Select single sequence or collection of sequence">
+			<option value="single" selected="true">Single sequence</option>
+			<option value="multiple">Multiple sequences</option>
+		</param>
+		<when value="single">
+			<param name="single_seq" format="fasta" type="data" label="Single sequence" help="Single sequence" multiple="true"/>
+		</when>
+		<when value="multiple">
+			<param name="multiple_seq" format="fasta" type="data_collection" collection_type="list" label="Multiple sequence"/>
+		</when>
+	    </conditional>
+            <conditional name="repeat_sequence">
+		    	<param name="selector" type="select" label="Use repeat masking sequence" help="Enable this option if you want to use repeat masked sequences .">
+			<option value="no_repeat_seq" selected="true">No repeat sequence</option>
+                	<option value="single_masked_seq">Single masked sequence</option>
+                	<option value="multiple_masked_seq">Multiple masked sequences</option>
+            	</param>
+		<when value="single_masked_seq">
+                      <param name="masked_seq_single" format="fasta" type="data" label="repeat masked sequence" help="Single masked sequence" multiple="true"/>
+                </when>
+		<when value="multiple_masked_seq">
+		      <param name="masked_seq_multiple" format="fasta" type="data_collection" collection_type="list" label="repeat masked sequence" help="Multiple repeat sequence"/>
+	      	</when>
+		<when value="no_repeat_seq"></when>
+            </conditional>
+            <conditional name="matrix_type">
+                  <param name="matrix_type_selector" type="select" label="Select matrix type" help="Select matrix for your species">
+                         <option value="indexed" selected="true">Use a built-in index</option>
+                         <option value="history">Use one from the history</option>
+                  </param>
+                  <when value="indexed">
+                         <param name="species_matrix" type="select" label="Select a species matrix" help="If your species of interest is not listed, contact your Galaxy admin">
+                          <options from_data_table="fgenesh_matrix">
+                              <filter type="sort_by" column="2"/>
+                              <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+                           </options>
+                         </param>
+                  </when>
+                  <when value="history">
+                       <param name="own_file" type="data" format="txt" label="Select species matrix" />
+                  </when>
+            </conditional>
+            <conditional name="db_type">
+		  <param name="db_type_selector" type="select" label="Select db type" help="Select Mammal DB / Non Mammal DB">
+                         <option value="indexed" selected="true">Use a built-in index</option>
+                         <option value="history">Use one from the history</option>
+                  </param>
+                  <when value="indexed">
+                         <param name="genome_type" type="select" label="Select a reference database" help="If your database of interest is not listed, contact your Galaxy admin">
+                          <options from_data_table="fgenesh_db">
+                              <filter type="sort_by" column="2"/>
+                              <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+                           </options>
+                         </param>
+                  </when>
+                  <when value="history">
+                       <param name="own_file" type="data" format="txt" label="Select reference database" />
+                  </when>
+	  </conditional>
+            <conditional name="nr_type">
+                  <param name="nr_type_selector" type="select" label="Select nr db type" help="Select NR database">
+                         <option value="indexed" selected="true">Use a built-in index</option>
+                         <option value="history">Use one from the history</option>
+                  </param>
+                  <when value="indexed">
+                         <param name="nr_db" type="select" label="Select a NR database" help="If your database of interest is not listed, contact your Galaxy admin">
+                          <options from_data_table="fgenesh_nr">
+                              <filter type="sort_by" column="2"/>
+                              <validator type="no_options" message="No indexes are available for the selected input dataset"/>
+                           </options>
+                         </param>
+                  </when>
+                  <when value="history">
+                       <param name="own_file" type="data" format="txt" label="Select reference database" />
+                  </when>
+           </conditional>
+           <conditional name="map_mrna">
+                  <param name="mRNAs" type="select" label="mRNAs" help="map known mRNA data to the genomic sequences">
+                        <option value="0">No</option>
+                        <option value="1">Yes</option>
+                  </param>
+                  <when value="1">
+                          <param name="prot_file" type="data" format="fasta" label="cDNA file" help="cdna fasta file for known mRNAs"/>
+                          <param name="cdna_file" type="data" format="fasta" label="Protein file" help="protein fasta file for known mRNAs"/>
+                          <param name="dat_file" type="data" format="txt" label="Dat file" help="dat file for known mRNAs"/>
+                  </when>
+                  <when value="0"/>
+	  </conditional>
+           <conditional name="map_est">
+                  <param name="ESTs" type="select" label="ESTs" help="map ESTs to the genomic sequences">
+                        <option value="0">No</option>
+                        <option value="1">Yes</option>
+                  </param>
+                  <when value="1">
+                          <param name="est_file" type="data" format="fasta" label="ESTs file" help="fasta file with ESTs"/>
+                  </when>
+                  <when value="0"/>
+          </conditional>
+	  <param name="predict_gc" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Predict GC" help="predict genes with GC donor splice sites or not"/>
+	  <param name="use_reads" type="boolean" checked="false" truevalue="1" falsevalue="0" label="USE_READS" help="use reads info to improve gene models"/>
+	  <param name="find_homologs" type="boolean" checked="false" truevalue="1" falsevalue="0" label="Find homologs" help="find homologs for ab initio predicted genes"/>
+          <param name="use_proteins" type="boolean" checked="false" truevalue="1" falsevalue="0" label="USE_PROTEINS" help="Using known proteins for prediction"/>
+          <param name="intronic_genes" type="boolean" checked="false" truevalue="1" falsevalue="0" label="INTRONIC_GENES" help="predict genes in long introns of other genes"/>
+    </inputs>
+    <outputs>
+	    <!--<data name="single_annotation" format="txt" label="${tool.name} on ${on_string}: single annotation" from_work_dir="result/*.resn3">
+		  <filter>input['input_type'] == 'individual'</filter>
+	    </data>-->
+	    <collection name="multiple_annotation" type="list" label="${tool.name} on ${on_string}: multiple annotation">
+		  <discover_datasets pattern="(?P&lt;name&gt;.*).resn3$" format="txt" directory="result"/>
+			  <!--<filter>input['input_type'] == 'multiple'</filter>-->
+	    </collection>
+            <collection name="annotated_gff3" type="list" label="${tool.name} on ${on_string}: GFF3">
+                  <discover_datasets pattern="(?P&lt;name&gt;.*).gff3$" format="gff" directory="output_gff"/>
+	    </collection>
+    </outputs>
+	
+    <tests>
+	 <test>
+            <!-- #1 test -->
+        </test>
+    </tests>
+    <help><![CDATA[
+	    
+.. class:: infomark
+
+**What it does**
+	    
+*Fgenesh is a genome annotation tool*
+
+**Input**
+	    
+- input file  -  Genome or de novo assembly file in FASTA format and repeat masking fasta file  
+
+**Command line Example:**
+
+- FGENESHPIPE/run_pipe.pl  human_prj.cfg  -l seq_1.list  -m seq_1N.list  -d results_1
+
+- human_prj.cfg - a configuration file containing path to database, path gene matrix and the settings of the third party softwares
+- seq_1.list - a list of chromosome / scaffolds (unmasked)
+- seq_1N.ist - a list of chromosome / scaffolds (masked)
+- results_1 - output folder
+
+
+**Parameters:**
+
+- matrix type - built-in index or select the index from the user history
+- species matrix - select the gene matrix that matches the species of your input genome if built-in index in the matrix type is selected
+- db type - built-in database or select the database from the user history
+- reference database - fgenesh comes with Mammal DB / Non Mammal DB if built-in database in the db type is selected
+- NR db type - built-in non-redundant database or select the non-redundant database from the user history
+- NR database - select the non-redundant database for your species
+- mRNAs - map known mRNA sequences to the genomic sequences (default: No). If Yes is selected, make sure .cdna, .pro and .dat files are available in the user history.
+- ESTs - map ESTs to the genomic sequences (default: No) if Yes is selected, make sure there is ESTs fasta file available in the user history
+- Predict GC - predict genes with GC donor splice sites or not (default:No)
+- USE_READS - use reads info to improve gene models (default:No)
+- Find homologs - find homologs for ab initio predicted genes (0 - no, 1 - yes)
+- USE_PROTEINS - Using known proteins for prediction (default: No)
+- INTRONIC_GENES - Predicting genes in long introns of other genes (default: No)
+
+**Output**
+
+- txt (resn3) - raw output produced by Fgenesh
+- gff3 - gff3 file format converted from the Fgenesh resn3 file
+
+ 
+.. class:: infomark
+
+**Contributor**
+
+Galaxy Australia wrapped the Fgenesh: the original software is available on this website `FGENESH <http://www.softberry.com/berry.phtml?topic=fgenesh_plus_plus&group=help&subgroup=pipelines>`_. Note: The software license needs to acquired prior to use.
+
+	    
+	]]></help>
+</tool>
diff --git a/tools/fgenesh/fgenesh_get_mrnas_gc.xml b/tools/fgenesh/fgenesh_get_mrnas_gc.xml
new file mode 100644
index 00000000..6dc67039
--- /dev/null
+++ b/tools/fgenesh/fgenesh_get_mrnas_gc.xml
@@ -0,0 +1,80 @@
+<tool id="fgenesh_get_mrnas_gc" name="FGENESH get mRNA or GC" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" >
+    <description>rensn3 and genomic file</description>
+    <macros>
+	  <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <command><![CDATA[
+	    ln -s '$resn3_file' '$resn3_file.element_identifier' &&
+	    ln -s '$sequence_file' '$sequence_file.element_identifier' &&
+	    get_mrnas_or_GC.pl '$resn3_file.element_identifier' '$sequence_file.element_identifier' $output_mrna_file $CDS $GC -fix_id $fix_id_type 2>&1 | tee -a '$log'
+
+	    ]]></command>
+    <inputs>
+	    <param name="resn3_file" format="txt" type="data" label="Input Resn3 file"/>
+	    <param name="sequence_file" format="fasta" type="data" label="Sequence file"/>
+	    <param argument="-CDS" type="boolean" truevalue="-CDS" falsevalue="" checked="true" label="CDS only" help="create CDS fasta file" />
+	    <param argument="-GC" type="boolean" truevalue="-GC" falsevalue="" checked="false" label="GC report" help="report GC donor splice sites" />
+	    <param name="fix_id_type" type="select" label="fix header id in the output fasta file" help="Default: sequence name">
+                <option value="seq_No" selected="True">Sequence No</option>
+		<option value="seq_count">Sequence Count</option>
+		<option value="seq_nmae">Sequence Name</option>
+            </param>
+    </inputs>
+    <outputs>
+	    <data name="output_mrna_file" format="fasta"  label="${tool.name} on ${on_string}: mRNA file"/>
+	    <data name="log" format="txt" label="${tool.name} on ${on_string}: Report"/>
+    </outputs>
+	
+    <tests>
+	 <test>
+            <!-- #1 test -->
+        </test>
+    </tests>
+    <help><![CDATA[
+	    
+.. class:: infomark
+
+**What it does**
+	    
+*Fgenesh get_mrnas_or_GC.pl is a tool to extract CDS(mRNAs) sequences using the input file with Fgenesh/Fgenesh++ predictions*
+
+**Input**
+	    
+- resn3 file  -  input file with Fgenesh/Fgenesh++ predictions  
+
+- sequence file - input  file with genomic FASTA sequences
+
+
+**Command line Example:**
+
+- get_mrnas_or_GC.pl <resn3_file> <seq_file> <mrna_file> [-CDS] [-GC] [-fix_id seq_name | seq_No | seq_count]
+
+- mrna_file - output file with CDS sequences in fasta file format
+
+- CDS - CDS only
+
+- GC - rerport GC donor splice sites
+
+- fix_id 
+- 1) seq_name - use sequence names [example of ID: 'ENm002_gene_7'] 
+- 2) seq_No - (numbers are taken from 'Sequence: <No>' if such field is present, e.g., "Length of sequence: 1000000, Sequence: 2, File: encode_hg17_44N.fa") - [example of ID: 'seq_2_gene_7']
+- 3) seq_count - use numbers (count sequences starting from 1) [example of ID: 'seq_2_gene_7']
+	
+**Output**
+
+- mrna_file - output file with CDS sequences in fasta file format
+
+- report - a report file of CDS and GC
+
+ 
+.. class:: infomark
+
+**Contributor**
+
+Galaxy Australia wrapped the Fgenesh: the original software is available on this website `FGENESH <http://www.softberry.com/berry.phtml?topic=fgenesh_plus_plus&group=help&subgroup=pipelines>`_. Note: The software license needs to acquired prior to use.
+
+	    
+	]]></help>
+</tool>
diff --git a/tools/fgenesh/fgenesh_merge.xml b/tools/fgenesh/fgenesh_merge.xml
new file mode 100644
index 00000000..566265f1
--- /dev/null
+++ b/tools/fgenesh/fgenesh_merge.xml
@@ -0,0 +1,87 @@
+<tool id="fgenesh_merge" name="FGENESH merge" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" >
+    <description>gff3 or resn3 file</description>
+    <macros>
+	  <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <command><![CDATA[
+
+	    #if $input.selector == "gff"
+	    	mkdir -p input_gff_files &&
+	    	#for $gff_file in $input.input_gff_collection
+	    	    ln -s '$gff_file' input_gff_files/'${gff_file.element_identifier}.gff3' &&
+	    	#end for
+	    	merge_gff3.pl input_gff_files/ $output_gff
+	    #else if $input.selector == "resn3"
+	    	mkdir -p input_resn3_files &&
+		#for $resn3_file in $input.input_resn3_collection
+	    	     ln -s '$resn3_file' input_resn3_files/'${resn3_file.element_identifier}.resn3' &&
+		     echo input_resn3_files/'${resn3_file.element_identifier}.resn3' >> resn3.list &&
+                #end for
+	    	merge_res_files.pl -l resn3.list -dir input_resn3_files/ -o $output_resn3
+	    #end if
+
+	    ]]></command>
+    <inputs>
+	    <conditional name="input">
+		<param name ="selector" type="select" label="Input file type (resn3/gff3) for merging">
+			<option value="gff">gff</option>
+			<option value="resn3">resn3</option>
+		</param>
+	   	<when value="gff">
+            		<param name="input_gff_collection" format="gff3" type="data_collection" collection_type="list" label="collection"/>
+		</when>
+		<when value="resn3">
+			<param name="input_resn3_collection" format="txt" type="data_collection" collection_type="list" label="collection"/>
+		</when>
+	    </conditional>
+    </inputs>
+    <outputs>
+	    <data name="output_gff" format="gff3"  label="${tool.name} on ${on_string}: Merged GFF">
+		    <filter> input['selector'] == "gff" </filter>
+	    </data>
+            <data name="output_resn3" format="txt"  label="${tool.name} on ${on_string}: Merged RESN3">
+                    <filter> input['selector'] == "resn3" </filter>
+            </data>
+    </outputs>
+	
+    <tests>
+	 <test>
+            <!-- #1 test -->
+        </test>
+    </tests>
+    <help><![CDATA[
+	    
+.. class:: infomark
+
+**What it does**
+	    
+*Fgenesh merge_gff3.pl is a tool for merging multiple gff3 files into a single gff3 file*
+
+**Input**
+	    
+- input files  -  a collection of gff3 files from Fgenesh Genome Annotation  
+
+**Command line Example:**
+
+- merge_gff3.pl input_directory/ output.gff3
+
+- input_directory - a list of gff3 files in a directory or a collection in Galaxy Histiry
+	    
+- output.gff3 - merged gff3 file
+
+**Output**
+
+- gff3 - a single gff3 file
+
+ 
+.. class:: infomark
+
+**Contributor**
+
+Galaxy Australia wrapped the Fgenesh: the original software is available on this website `FGENESH <http://www.softberry.com/berry.phtml?topic=fgenesh_plus_plus&group=help&subgroup=pipelines>`_. Note: The software license needs to acquired prior to use.
+
+	    
+	]]></help>
+</tool>
diff --git a/tools/fgenesh/fgenesh_split.xml b/tools/fgenesh/fgenesh_split.xml
new file mode 100644
index 00000000..2c0c5d7a
--- /dev/null
+++ b/tools/fgenesh/fgenesh_split.xml
@@ -0,0 +1,93 @@
+<tool id="fgenesh_split" name="FGENESH split" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" >
+    <description>fasta sequences</description>
+    <macros>
+	  <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <command><![CDATA[
+	    mkdir -p contigs &&
+	    #if $output_file_extension == "default":
+	    	split_multi_fasta.pl $input_fasta -name $filename_format -dir contigs/ 
+	    #else
+	    	split_multi_fasta.pl $input_fasta -name $filename_format -dir contigs/ -ext $output_file_extension
+	    #end if
+
+	    ]]></command>
+    <inputs>
+	    <param name="input_fasta" type="data" format="fasta,fasta.gz" label="Fasta file"/>
+	    <param name="filename_format" type="select" label="Output filename format" help="Default: count">
+            	<option value="count" selected="True">sequential count</option>
+            	<option value="seq_id">Use Sequence Header from the input fasta file</option>
+	    </param>
+	    <param name="output_file_extension" type="select" label="Output file extension" help="Default: fasta">
+		<option value="default" selected="True">default</option>
+                <option value="fa" >fa</option>
+                <option value="seq">seq</option>
+            </param>
+    </inputs>
+    <outputs>
+	   <collection name="fasta_collection" type="list" label="${tool.name} on ${on_string} : split seqences">
+	      <discover_datasets pattern="(?P&lt;designation&gt;.+)" directory="contigs" format="fasta"/>
+		 <!--<discover_datasets pattern="(?P&lt;designation&gt;.*)\.fa" format="fasta" directory="outdir"/>-->
+           </collection>
+    </outputs>
+	
+    <tests>
+	 <test>
+            <!-- #1 test -->
+        </test>
+    </tests>
+    <help><![CDATA[
+	    
+.. class:: infomark
+
+**What it does**
+	    
+*Fgenesh is a genome annotation tool*
+
+**Input**
+	    
+- input file  -  Genome or de novo assembly file in FASTA format and repeat masking fasta file  
+
+**Command line Example:**
+
+- FGENESHPIPE/run_pipe.pl  human_prj.cfg  -l seq_1.list  -m seq_1N.list  -d results_1
+
+- human_prj.cfg - a configuration file containing path to database, path gene matrix and the settings of the third party softwares
+- seq_1.list - a list of chromosome / scaffolds (unmasked)
+- seq_1N.ist - a list of chromosome / scaffolds (masked)
+- results_1 - output folder
+
+
+**Parameters:**
+
+- matrix type - built-in index or select the index from the user history
+- species matrix - select the gene matrix that matches the species of your input genome if built-in index in the matrix type is selected
+- db type - built-in database or select the database from the user history
+- reference database - fgenesh comes with Mammal DB / Non Mammal DB if built-in database in the db type is selected
+- NR db type - built-in non-redundant database or select the non-redundant database from the user history
+- NR database - select the non-redundant database for your species
+- mRNAs - map known mRNA sequences to the genomic sequences (default: No). If Yes is selected, make sure .cdna, .pro and .dat files are available in the user history.
+- ESTs - map ESTs to the genomic sequences (default: No) if Yes is selected, make sure there is ESTs fasta file available in the user history
+- Predict GC - predict genes with GC donor splice sites or not (default:No)
+- USE_READS - use reads info to improve gene models (default:No)
+- Find homologs - find homologs for ab initio predicted genes (0 - no, 1 - yes)
+- USE_PROTEINS - Using known proteins for prediction (default: No)
+- INTRONIC_GENES - Predicting genes in long introns of other genes (default: No)
+
+**Output**
+
+- txt (resn3) - raw output produced by Fgenesh
+- gff3 - gff3 file format converted from the Fgenesh resn3 file
+
+ 
+.. class:: infomark
+
+**Contributor**
+
+Galaxy Australia wrapped the Fgenesh: the original software is available on this website `FGENESH <http://www.softberry.com/berry.phtml?topic=fgenesh_plus_plus&group=help&subgroup=pipelines>`_. Note: The software license needs to acquired prior to use.
+
+	    
+	]]></help>
+</tool>
diff --git a/tools/fgenesh/fgenesh_to_genbank.xml b/tools/fgenesh/fgenesh_to_genbank.xml
new file mode 100644
index 00000000..493da03d
--- /dev/null
+++ b/tools/fgenesh/fgenesh_to_genbank.xml
@@ -0,0 +1,105 @@
+<tool id="fgenesh_to_genbank" name="FGENESH to Genbank" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" >
+    <description>create genbank file</description>
+    <macros>
+	  <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <command><![CDATA[
+	    mkdir -p output_dir &&
+	    #for $resn3_file in $resn3_collection
+                 ln -s '$resn3_file' ${resn3_file.element_identifier} &&
+                 echo ${resn3_file.element_identifier} >> resn3.list &&
+            #end for
+	    
+	    sort resn3.list > resn3_sorted.list &&
+
+	    #for $seq_file in $sequence_collection
+                  ln -s '$seq_file' ${seq_file.element_identifier} &&
+                  echo ${seq_file.element_identifier} >> seq.list &&
+            #end for
+
+	    sort seq.list > seq_sorted.list &&
+
+	    ln -s $genbank_header header &&
+
+	    run_fgenesh_2_genbank.pl $polya $skip_empty
+	    -div:$div
+	    -org_code:$organism_code
+	    -method:$method 
+	    header
+	    resn3_sorted.list
+	    seq_sorted.list
+	    output_dir/
+	    2>&1
+
+	    ]]></command>
+    <inputs>
+	    <param name="genbank_header" format="genbank" type="data"  label="Genbank header file"/>
+	    <param name="resn3_collection" format="txt" type="data_collection" collection_type="list" label="Input resn3 file" help="list of files with predictions in Fgenesh/Fgenesh++ format"/>
+	    <param name="sequence_collection" format="fasta" type="data_collection" collection_type="list" label="Input fasta file"  help="list of files with sequences in FASTA format"/>
+		    <!--<param argument="-tata" type="boolean" truevalue="-taxa" falsevalue="" checked="false" label="predict TATA boxes" help="annotate predicted TATA boxes" />-->
+	    <param argument="-polya" type="boolean" truevalue="-polya" falsevalue="" checked="false" label="predict PolyA signals" help="annotate predicted PolyA signals" />
+	    <param argument="-skip_empty" type="boolean" truevalue="-skip_empty" falsevalue="" checked="true" label="Discard empty prediction" help="do not append records for sequences with no predictions" />
+    	    <param name="div" type="text" value="PRI" label="GenBank division (PRI by default)"/>
+	    <param name="organism_code" type="text" value="GN" label="prefix for gene names" help="2-letter organism code used as prefix for gene names. (i.e HS - Homo sapiens)" />
+	    <param name="method" type="text" value="Fgenesh" label="program used for gene prediction" help="Fgenesh by default"/>
+    </inputs>
+    <outputs>
+            <collection name="output_genbank" type="list" label="${tool.name} on ${on_string}: genbank">
+                  <discover_datasets pattern="(?P&lt;name&gt;.*).gb$" format="genbank" directory="output_dir"/>
+            </collection>
+		    <!--<data name="log" format="txt" label="${tool.name} on ${on_string}: Report"/>-->
+    </outputs>
+	
+    <tests>
+	 <test>
+            <!-- #1 test -->
+        </test>
+    </tests>
+    <help><![CDATA[
+	    
+.. class:: infomark
+
+**What it does**
+	    
+*Fgenesh run_fgenesh_2_genbank.pl is a tool to convert Fgenesh annotation file (resn3) and sequence fasta file to genbank files*
+
+**Input**
+	    
+- resn3 file  -  a list of input files with Fgenesh/Fgenesh++ predictions  
+
+- sequence file - a list of fasta files with genomic FASTA sequences 
+
+**Command line Example:**
+
+- run_fgenesh_2_genbank.pl [options] <header_file> <res_files_list> <seq_files_list> <gb_files_dir>
+
+- header file - file with additional comments (use Genbank keywords)
+
+- res_files_list - a list of resn3 files (i.e a collection of resn3 files in the Galaxy history)
+
+- seq_files_list - a list of corresponding fasta files to the input resn3 files in res_files_list (i.e a collection of fasta files in the Galaxy history)
+
+- option
+
+- taxa  - annotate predicted TATA boxes (not implemented in this wrapper due to unexpected bug)  
+- polya - annotate predicted PolyA signals
+- div:<DIV> - GenBank division (PRI by default)
+- org_code:<NN> - 2-letter organism code used as prefix for gene names, for example: HS - Homo sapiens, PG - Punicum granatum (Pomegranate), EG - Elaeis guineensis, GN is used by default if no other code is provided
+- method:<program> - program used for gene prediction (Fgenesh, Fgenesh++); Fgenesh by default)
+- skip_empty - do not append records for sequences with no predictions
+
+**Output**
+
+- folder - contains a list of genbank files (aka a collection in the Galaxy history)
+
+.. class:: infomark
+
+**Contributor**
+
+Galaxy Australia wrapped the Fgenesh: the original software is available on this website `FGENESH <http://www.softberry.com/berry.phtml?topic=fgenesh_plus_plus&group=help&subgroup=pipelines>`_. Note: The software license needs to acquired prior to use.
+
+	    
+	]]></help>
+</tool>
diff --git a/tools/fgenesh/macros.xml b/tools/fgenesh/macros.xml
index ec455c7e..198dc4fc 100644
--- a/tools/fgenesh/macros.xml
+++ b/tools/fgenesh/macros.xml
@@ -6,8 +6,7 @@
     <token name="@DIGEST@">3e414082c1a12393ab10b1bc4e22de540397fef626840945824a76f6d62def6b</token>
     <xml name="requirements">
         <requirements>
-	      <container type="docker">wthang/genomeannotation:latest</container>
-	      <requirement type="package" version="@VERSION@">blast</requirement>
+	      <container type="docker">wthang/genomepannotation:v2</container>
         </requirements>
     </xml>
     <xml name="stdio">