final-workflow-short.yml

#Aexample yml file for specific organism

#The full path look like this:
#For genomic fasta/scaffold data:
#$(inputs.PATH[0])/$(inputs.tree[0])/$(inputs.tree[1])/[deepPATH_genomic_fasta]
#For gene set /analysis data:
#$(inputs.PATH[0])/$(inputs.tree[0])/$(inputs.tree[1])/[deepPATH_genomic_fasta]/[deepPATH_analyses]/$(inputs.tree[2])
#For bigwig data:
#$(inputs.PATH[0])/$(inputs.tree[0])/$(inputs.tree[1])/[deepPATH_bigwig]
#For jbrowse/apollo data:
#$(inputs.PATH[0])/$(inputs.tree[0])/$(inputs.tree[1])/[deepPATH_apollo2_data]
#For intermediate files: 
#$(inputs.PATH[1])/$(inputs.tree[0])/$(inputs.tree[1])/[deepPATH_genomic_fasta]/[deepPATH_analyses]/$(inputs.tree[2])
#For BLAT data:
#$(inputs.PATH[2])
#-------------------------------------------------------------------------------
PATH: [ #don't change this unless you know what you're doing
/app/data/other_species,
/app/data/working_files,
/app/data/blat/db
]
#-------------------------------------------------------------------------------
#Enter the path of manage.py - don't change this unless you know what you're doing 
managePy_Path: /usr/local/i5k/
#Enter the path of blast/db
blastdb_Path_stage: [/usr/local/i5k/media/blast/db]
blastdb_Path_production: [/usr/local/i5k/media/blast/db]
#Enter the path of hmmer/db
hmmerdb_Path_stage: [/usr/local/i5k/media/hmmer/db]
hmmerdb_Path_production: [/usr/local/i5k/media/hmmer/db]
#-------------------------------------------------------------------------------
tree: [
saceub, #species abbreviation - format gggsss
SEUB3.0, #the assembly name
NCBI Saccharomyces eubayanus Annotation Release 100, #the gene set/annotation name
NCBI Saccharomyces eubayanus Annotation Release 100 functional annotation#the functional annotation directory name, which should be in the analyses/ folder. Add 'NA' if not applicable.
]
#Genus and species of the organism you are onboarding. If the organism also has a subspecies name, include this in the 'species' field, separated by an underscore from the species name. e.g. 'grandis_grandis #species' for Anthonomus grandis sp. grandis
scientific_name: [
Saccharomyces, #genus
eubayanus #species
]

#Genomic fasta name and URL. The path field will be ignored if there is a valid URL. If local data is used, add 'NA' to the URL field, and add the local file path to the path field. The file needs to be unzipped. With singularity, the local file path needs to be in the mounted /work-dir volume
genome_fasta_name: [
GCF_001298625.1_SEUB3.0_genomic.fna
]
url_genomic_fasta: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_genomic.fna.gz
]
path_genomic_fasta: 
  class: File
  path: /app/data/GCF_001298625.1_SEUB3.0_genomic.fna

#Protein fasta name and url. The path field will be ignored if there is a valid URL. If local data is used, add 'NA' to the URL field, and add the local file path to the path field. The file needs to be unzipped. With singularity, the local file path needs to be in the mounted /work-dir volume
protein_fasta_name: [
GCF_001298625.1_SEUB3.0_translated_cds.faa
]
url_protein_fasta: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_translated_cds.faa.gz
]
path_protein_fasta:
  class: File
  path: /app/data/GCF_001298625.1_SEUB3.0_translated_cds.faa

#Transcript fasta name and URL. The path field will be ignored if there is a valid URL. If local data is used, add 'NA' to the URL field, and add the local file path to the path field. The file needs to be unzipped. With singularity, the local file path needs to be in the mounted /work-dir volume
transcript_fasta_name: [
GCF_001298625.1_SEUB3.0_rna_from_genomic.fna
]
url_transcript_fasta: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_rna_from_genomic.fna.gz
]
path_transcript_fasta:
  class: File
  path: /app/data/GCF_001298625.1_SEUB3.0_rna_from_genomic.fna

#CDS fasta name and URL. The path field will be ignored if there is a valid URL. If local data is used, add 'NA' to the URL field, and add the local file path to the path field. The file needs to be unzipped. With singularity, the local file path needs to be in the mounted /work-dir volume
cds_fasta_name: [
GCF_001298625.1_SEUB3.0_cds_from_genomic.fna
]
url_cds_fasta: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_cds_from_genomic.fna.gz
]
path_cds_fasta: 
  class: File
  path: /app/data/GCF_001298625.1_SEUB3.0_cds_from_genomic.fna

#GFF name and URL. The path field will be ignored if there is a valid URL. If local data is used, add 'NA' to the URL field, and add the local file path to the path field. The file needs to be unzipped. With singularity, the local file path needs to be in the mounted /work-dir volume
url_genomic_gff: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_genomic.gff.gz
]
path_genomic_gff:
  class: File 
  path: /app/data/GCF_001298625.1_SEUB3.0_genomic.gff
gff_name: [
GCF_001298625.1_SEUB3.0_genomic.gff
]

#release name or number of the annotation file. For NCBI's older numeric format (e.g. 100), use single quotes around the number so that the workflow accepts it. Strings (not integers) don't need single quotes. 
gff_release_number: GCF_027563975.1-RS_2023_02

#NCBI table file URL. If no functional annotation is availble, add 'NA' to the URL field.
url_table_file: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_feature_table.txt.gz
]

#Example url table file: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_feature_table.txt.gz

#GO file path. This path will be ignored if the table file URL is 'NA'. The file needs to be unzipped. With singularity, the local file path needs to be in the mounted /work-dir volume

path_GO:
  class: File 
  path: NCBI Saccharomyces eubayanus Annotation Release 100 functional annotation/GCF_001298625.1_complete.gaf.tsv


#KEGG file path. This path will be ignored if the table file URL is 'NA'. The file needs to be unzipped. With singularity, the local file path needs to be in the mounted /work-dir volume
path_KEGG:
  class: File
  path: NCBI Saccharomyces eubayanus Annotation Release 100 functional annotation/KOBAS/GCF_001298625.1_KOBAS_acc_pathways.tsv

#-------------------------------------------------------------------------------
url_md5checksums: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/md5checksums.txt
]
#-------------------------------------------------------------------------------
deepPATH_genomic_fasta: [
scaffold #the containing directory for the assembly. Don't change this unless you have to. 
]

#-------------------------------------------------------------------------------
#This section is usually for the official gene set. 
deepPATH_analyses: [
analyses #the containing directory for ALL analyses, not just the gene set loaded here. Don't change this unless you have to.
]

#-------------------------------------------------------------------------------
deepPATH_apollo2_data: [ 
#the containing directory for Jbrowse/Apollo data. Don't change this unless you have to. 
jbrowse/data
]
deepPATH_bigwig: [ #the containing directory for bigwig files generated from the assembly. Don't change this unless you have to.
scaffold/bigwig
]
#-------------------------------------------------------------------------------
#where apollo2-stage-node1 lives. Note that the host for the apollo-content docker container is https://localhost:8080 
host: [ 
https://apollo2-stage-node1.nal.usda.gov/apollo
]
host_production: [
http://apollo2-node1.nal.usda.gov/apollo
]
#authentication in apollo2
#apollo2-stage login
login_apollo2:  [
admin@local.host,
password
]
#apollo2 production login
login_apollo2_production: [
user@ars.usda.gov,
password
]
#-------------------------------------------------------------------------------
#Apollo server
#username
Apollo_account: apollo@apollo2-node1.nal.usda.gov
Gmod_account: i5k@i5k-node1.nal.usda.gov
Gmod_stage_account: i5k@i5k-stage-node1.nal.usda.gov

#-------------------------------------------------------------------------------
#symlink PATH - don't change this unless you know what you're doing
MAIN_PATH: /app/i5k-tripal/wwwroot/data/Arthropoda
#-------------------------------------------------------------------------------
#scaffold readme information
organization: NCBI
link_to_publication: 'Not published, please follow Ft. Lauderdale/Toronto guidelines for data reuse' #Change the text if there is a publication link in the github ticket
#below are variable for django setup, TBD (not implemented yet)