-
Notifications
You must be signed in to change notification settings - Fork 1
/
final-workflow.yml
138 lines (134 loc) · 5.71 KB
/
final-workflow.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#example yml file for specific organism
#The full path look like this:
#For genomic fasta/scaffold data:
#$(inputs.PATH[0])/$(inputs.tree[0])/$(inputs.tree[1])/[deepPATH_genomic_fasta]
#For gene set /analysis data:
#$(inputs.PATH[0])/$(inputs.tree[0])/$(inputs.tree[1])/[deepPATH_genomic_fasta]/[deepPATH_analyses]/$(inputs.tree[2])
#For bigwig data:
#$(inputs.PATH[0])/$(inputs.tree[0])/$(inputs.tree[1])/[deepPATH_bigwig]
#For jbrowse/apollo data:
#$(inputs.PATH[0])/$(inputs.tree[0])/$(inputs.tree[1])/[deepPATH_apollo2_data]
#For intermediate files:
#$(inputs.PATH[1])/$(inputs.tree[0])/$(inputs.tree[1])/[deepPATH_genomic_fasta]/[deepPATH_analyses]/$(inputs.tree[2])
#For BLAT data:
#$(inputs.PATH[2])
#-------------------------------------------------------------------------------
PATH: [ #don't change this unless you know what you're doing
/app/data/other_species,
/app/data/working_files,
/app/data/blat/db
]
#-------------------------------------------------------------------------------
#Enter the path of manage.py - don't change this unless you know what you're doing
managePy_Path: /usr/local/i5k/
#Enter the path of blast/db
blastdb_Path_stage: [/usr/local/i5k/media/blast/db]
blastdb_Path_production: [/usr/local/i5k/media/blast/db]
#Enter the path of hmmer/db
hmmerdb_Path_stage: [/usr/local/i5k/media/hmmer/db]
hmmerdb_Path_production: [/usr/local/i5k/media/hmmer/db]
#-------------------------------------------------------------------------------
tree: [
apimel, #species abbreviation - format gggsss
Amel_HAv3.1, #the assembly name
Apis_mellifera_Annotation_Release_103, #the gene set/annotation name
NA #the functional annotation directory name, which should be in the analyses/ folder. Add 'NA' if not applicable.
]
scientific_name: [
Apis, #genus
mellifera #species
]
#Genomic fasta name and URL
genome_fasta_name: [
GCF_003254395.2_Amel_HAv3.1_genomic.fna #The fasta file name. This is needed for moving the related .2bi file in the production workflow.
]
url_genomic_fasta: [
ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/254/395/GCF_003254395.2_Amel_HAv3.1/GCF_003254395.2_Amel_HAv3.1_genomic.fna.gz
]
#Protein fasta name(s) and url(s)
protein_fasta_name: [
GCF_003254395.2_Amel_HAv3.1_translated_cds.faa #If you have more than one protein fasta file, add a ','
#GCF_003254395.2_Amel_HAv3.1_translated_cds.faa2, #Then add the second file name here with a ','
#GCF_003254395.2_Amel_HAv3.1_translated_cds.faa3 #Then add the last file name here
]
url_protein_fasta: [
ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/254/395/GCF_003254395.2_Amel_HAv3.1/GCF_003254395.2_Amel_HAv3.1_translated_cds.faa.gz
]
#Transcript fasta name and URL
transcript_fasta_name: [
GCF_003254395.2_Amel_HAv3.1_rna_from_genomic.fna
]
url_transcript_fasta: [
ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/254/395/GCF_003254395.2_Amel_HAv3.1/GCF_003254395.2_Amel_HAv3.1_rna_from_genomic.fna.gz
]
#CDS fasta name and URL
cds_fasta_name: [
GCF_003254395.2_Amel_HAv3.1_cds_from_genomic.fna
]
url_cds_fasta: [
ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/254/395/GCF_003254395.2_Amel_HAv3.1/GCF_003254395.2_Amel_HAv3.1_cds_from_genomic.fna.gz
]
#GFF name and URL
url_genomic_gff: [
ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/254/395/GCF_003254395.2_Amel_HAv3.1/GCF_003254395.2_Amel_HAv3.1_genomic.gff.gz
]
gff_name: [
GCF_003254395.2_Amel_HAv3.1_genomic.gff
]
gff_release_number: 101
#-------------------------------------------------------------------------------
url_md5checksums: [
ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/003/254/395/GCF_003254395.2_Amel_HAv3.1/md5checksums.txt
]
#-------------------------------------------------------------------------------
deepPATH_genomic_fasta: [
scaffold #the containing directory for the assembly. Don't change this unless you have to.
]
#-------------------------------------------------------------------------------
#This section is usually for the official gene set.
deepPATH_analyses: [
analyses #the containing directory for ALL analyses, not just the gene set loaded here. Don't change this unless you have to.
]
#-------------------------------------------------------------------------------
deepPATH_apollo2_data: [
#the containing directory for Jbrowse/Apollo data. Don't change this unless you have to.
jbrowse/data
]
deepPATH_bigwig: [ #the containing directory for bigwig files generated from the assembly. Don't change this unless you have to.
scaffold/bigwig
]
#-------------------------------------------------------------------------------
#where apollo2-stage-node1 lives
host: [
https://apollo2-stage-node1.nal.usda.gov/apollo
]
#where apollo2-node1 lives
host_production: [
https://apollo.nal.usda.gov/apollo
]
#authentication in apollo2
#apollo2-stage login
login_apollo2: [
password
]
#apollo2 production login
login_apollo2_production: [
password
]
#-------------------------------------------------------------------------------
#Apollo server
#username
Apollo_account: [email protected]
Gmod_account: [email protected]
Gmod_stage_account: [email protected]
#-------------------------------------------------------------------------------
#symlink PATH - don't change this unless you know what you're doing
MAIN_PATH: /app/i5k-tripal/wwwroot/data/Arthropoda
#-------------------------------------------------------------------------------
#scaffold readme information
organization: NCBI
accession: GCF0032543952 #This is the assembly accession number, but without any underscores or periods. E.g. GCF_003254395.2 converts into GCF0032543952
link_to_publication: 'Not published, please follow Ft. Lauderdale/Toronto guidelines for data reuse' #Change the text if there is a publication link in the github ticket
#below are variable for django setup, TBD (not implemented yet)