-
Notifications
You must be signed in to change notification settings - Fork 1
/
final-workflow-short.yml
177 lines (160 loc) · 8.59 KB
/
final-workflow-short.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#Aexample yml file for specific organism
#The full path look like this:
#For genomic fasta/scaffold data:
#$(inputs.PATH[0])/$(inputs.tree[0])/$(inputs.tree[1])/[deepPATH_genomic_fasta]
#For gene set /analysis data:
#$(inputs.PATH[0])/$(inputs.tree[0])/$(inputs.tree[1])/[deepPATH_genomic_fasta]/[deepPATH_analyses]/$(inputs.tree[2])
#For bigwig data:
#$(inputs.PATH[0])/$(inputs.tree[0])/$(inputs.tree[1])/[deepPATH_bigwig]
#For jbrowse/apollo data:
#$(inputs.PATH[0])/$(inputs.tree[0])/$(inputs.tree[1])/[deepPATH_apollo2_data]
#For intermediate files:
#$(inputs.PATH[1])/$(inputs.tree[0])/$(inputs.tree[1])/[deepPATH_genomic_fasta]/[deepPATH_analyses]/$(inputs.tree[2])
#For BLAT data:
#$(inputs.PATH[2])
#-------------------------------------------------------------------------------
PATH: [ #don't change this unless you know what you're doing
/app/data/other_species,
/app/data/working_files,
/app/data/blat/db
]
#-------------------------------------------------------------------------------
#Enter the path of manage.py - don't change this unless you know what you're doing
managePy_Path: /usr/local/i5k/
#Enter the path of blast/db
blastdb_Path_stage: [/usr/local/i5k/media/blast/db]
blastdb_Path_production: [/usr/local/i5k/media/blast/db]
#Enter the path of hmmer/db
hmmerdb_Path_stage: [/usr/local/i5k/media/hmmer/db]
hmmerdb_Path_production: [/usr/local/i5k/media/hmmer/db]
#-------------------------------------------------------------------------------
tree: [
saceub, #species abbreviation - format gggsss
SEUB3.0, #the assembly name
NCBI Saccharomyces eubayanus Annotation Release 100, #the gene set/annotation name
NCBI Saccharomyces eubayanus Annotation Release 100 functional annotation#the functional annotation directory name, which should be in the analyses/ folder. Add 'NA' if not applicable.
]
#Genus and species of the organism you are onboarding. If the organism also has a subspecies name, include this in the 'species' field, separated by an underscore from the species name. e.g. 'grandis_grandis #species' for Anthonomus grandis sp. grandis
scientific_name: [
Saccharomyces, #genus
eubayanus #species
]
#Genomic fasta name and URL. The path field will be ignored if there is a valid URL. If local data is used, add 'NA' to the URL field, and add the local file path to the path field. The file needs to be unzipped. With singularity, the local file path needs to be in the mounted /work-dir volume
genome_fasta_name: [
GCF_001298625.1_SEUB3.0_genomic.fna
]
url_genomic_fasta: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_genomic.fna.gz
]
path_genomic_fasta:
class: File
path: /app/data/GCF_001298625.1_SEUB3.0_genomic.fna
#Protein fasta name and url. The path field will be ignored if there is a valid URL. If local data is used, add 'NA' to the URL field, and add the local file path to the path field. The file needs to be unzipped. With singularity, the local file path needs to be in the mounted /work-dir volume
protein_fasta_name: [
GCF_001298625.1_SEUB3.0_translated_cds.faa
]
url_protein_fasta: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_translated_cds.faa.gz
]
path_protein_fasta:
class: File
path: /app/data/GCF_001298625.1_SEUB3.0_translated_cds.faa
#Transcript fasta name and URL. The path field will be ignored if there is a valid URL. If local data is used, add 'NA' to the URL field, and add the local file path to the path field. The file needs to be unzipped. With singularity, the local file path needs to be in the mounted /work-dir volume
transcript_fasta_name: [
GCF_001298625.1_SEUB3.0_rna_from_genomic.fna
]
url_transcript_fasta: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_rna_from_genomic.fna.gz
]
path_transcript_fasta:
class: File
path: /app/data/GCF_001298625.1_SEUB3.0_rna_from_genomic.fna
#CDS fasta name and URL. The path field will be ignored if there is a valid URL. If local data is used, add 'NA' to the URL field, and add the local file path to the path field. The file needs to be unzipped. With singularity, the local file path needs to be in the mounted /work-dir volume
cds_fasta_name: [
GCF_001298625.1_SEUB3.0_cds_from_genomic.fna
]
url_cds_fasta: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_cds_from_genomic.fna.gz
]
path_cds_fasta:
class: File
path: /app/data/GCF_001298625.1_SEUB3.0_cds_from_genomic.fna
#GFF name and URL. The path field will be ignored if there is a valid URL. If local data is used, add 'NA' to the URL field, and add the local file path to the path field. The file needs to be unzipped. With singularity, the local file path needs to be in the mounted /work-dir volume
url_genomic_gff: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_genomic.gff.gz
]
path_genomic_gff:
class: File
path: /app/data/GCF_001298625.1_SEUB3.0_genomic.gff
gff_name: [
GCF_001298625.1_SEUB3.0_genomic.gff
]
#release name or number of the annotation file. For NCBI's older numeric format (e.g. 100), use single quotes around the number so that the workflow accepts it. Strings (not integers) don't need single quotes.
gff_release_number: GCF_027563975.1-RS_2023_02
#NCBI table file URL. If no functional annotation is availble, add 'NA' to the URL field.
url_table_file: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_feature_table.txt.gz
]
#Example url table file: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_feature_table.txt.gz
#GO file path. This path will be ignored if the table file URL is 'NA'. The file needs to be unzipped. With singularity, the local file path needs to be in the mounted /work-dir volume
path_GO:
class: File
path: NCBI Saccharomyces eubayanus Annotation Release 100 functional annotation/GCF_001298625.1_complete.gaf.tsv
#KEGG file path. This path will be ignored if the table file URL is 'NA'. The file needs to be unzipped. With singularity, the local file path needs to be in the mounted /work-dir volume
path_KEGG:
class: File
path: NCBI Saccharomyces eubayanus Annotation Release 100 functional annotation/KOBAS/GCF_001298625.1_KOBAS_acc_pathways.tsv
#-------------------------------------------------------------------------------
url_md5checksums: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/md5checksums.txt
]
#-------------------------------------------------------------------------------
deepPATH_genomic_fasta: [
scaffold #the containing directory for the assembly. Don't change this unless you have to.
]
#-------------------------------------------------------------------------------
#This section is usually for the official gene set.
deepPATH_analyses: [
analyses #the containing directory for ALL analyses, not just the gene set loaded here. Don't change this unless you have to.
]
#-------------------------------------------------------------------------------
deepPATH_apollo2_data: [
#the containing directory for Jbrowse/Apollo data. Don't change this unless you have to.
jbrowse/data
]
deepPATH_bigwig: [ #the containing directory for bigwig files generated from the assembly. Don't change this unless you have to.
scaffold/bigwig
]
#-------------------------------------------------------------------------------
#where apollo2-stage-node1 lives. Note that the host for the apollo-content docker container is https://localhost:8080
host: [
https://apollo2-stage-node1.nal.usda.gov/apollo
]
host_production: [
http://apollo2-node1.nal.usda.gov/apollo
]
#authentication in apollo2
#apollo2-stage login
login_apollo2: [
password
]
#apollo2 production login
login_apollo2_production: [
password
]
#-------------------------------------------------------------------------------
#Apollo server
#username
Apollo_account: [email protected]
Gmod_account: [email protected]
Gmod_stage_account: [email protected]
#-------------------------------------------------------------------------------
#symlink PATH - don't change this unless you know what you're doing
MAIN_PATH: /app/i5k-tripal/wwwroot/data/Arthropoda
#-------------------------------------------------------------------------------
#scaffold readme information
organization: NCBI
link_to_publication: 'Not published, please follow Ft. Lauderdale/Toronto guidelines for data reuse' #Change the text if there is a publication link in the github ticket
#below are variable for django setup, TBD (not implemented yet)