-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathuser.yml
121 lines (120 loc) · 3.31 KB
/
user.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Blast options and task
# updated to blast+ version: 2.2.30+
#
# Options currently implemented for engine are:
# - tblastn
# - blastn
# default in user.yml is tblastn
engine: tblastn
#
#
# separate each transcriptome in a individual folder
# this will create many folders in the output directory
# each ending with the database name
separate_db: true
#
#
# number of threads to use
# only valid with separate_db option
use_threads: 2
#
# path to debugging file, change it in respect to user.yml
debug:
file: output/log.txt
#
#
# Opts are engine specific, the default in user.yml
# are for tblastn
#
# Example of valid options (as of the writing of this file):
# - for tblastn (default in user.yml):
# opts: "-max_target_seqs 500 -evalue 1E-20"
# - for blastn:
# opts: "-dust no -max_target_seqs 500 -evalue 1E-20"
opts: "-max_target_seqs 500 -evalue 1E-20"
#
#
#
# Threshold that should filter out results by identity
# values can take between 0 and 1 (i.e. 0% and 100%)
identity:
# minimum range for identity
min: .40
# maximum range for identity
max: 1
#
#
# Filtering options
#
prune_identical:
# use_worst: will pick the result with highest identity if false
# and the lowest identity if true
use_worst: false
# first: the initial filtering step will remove all identical sseqid and
# pick the best/worst (depending on use_worst option)
first: sseqid
# list: list of subsequent filtering columns, works the same as first
list:
- nt_aligned_seq
#
#
# directory where csv are kept to add annotation to the results
# each csv should have at least 3 columns, with headers
# the csv must be separated by tabs (\t)
#
# column 1 & 2: header should match columns in the trimmed.csv results and
# each line that matches will be added the following columns
#
# columns 3 and on: header will be the header of the new column and information
# on these columns will be added to the matched elements
#
# example:
# db sseqid expression_level
# backberry comp20_u1 23
# backberry comp33_u1 33
# bilberry comp33_u1 42
# ....
annotation_dir: "db_and_queries/annotation"
#
# Directory that holds all database files in blastdb format
# if you need to convert a fasta file to this format, you
# can use the tool from blast+:
# $ makeblastdb
# or put the files in db_and_queries/import_db and run the
# the script that is in that folder
db:
parent: db_and_queries/db
# In case list is empty it will use all blast dbs in parent
# that have a .nhr extension
list:
#
# Directories that holds all queries in form of fasta files
# All query files must have as extensions:
# .fasta .query .fas .fna
query:
parent: db_and_queries
# It supports multiple folders holding the queries by
# adding new lines in 'folders' options
# note: all new lines must follow indentation and start
# with a dash '-' (without quotes)
folders:
- queries
#
# Configuration for finding longest ORF
# TODO:
# - max: maximum size of sequence, Inf
orf:
# stop codon: sequence that indicates where ORF ends
stop_codon:
- tag
- taa
- tga
# start codon: sequence that indicates where ORF begins
start_codon:
- atg
# reverse: search the reverse sequence
reverse: true
# direct: search the given sequence (at least this and reverse must be used)
direct: true
# min: minimum size of sequence
min: 120