Skip to content

Commit

Permalink
annotation dir is optional
Browse files Browse the repository at this point in the history
  • Loading branch information
averissimo committed Sep 22, 2016
1 parent 8fa7e54 commit e638c9d
Show file tree
Hide file tree
Showing 12 changed files with 93 additions and 88 deletions.
17 changes: 11 additions & 6 deletions config/config_blast.rb
Original file line number Diff line number Diff line change
Expand Up @@ -251,10 +251,9 @@ def validate_config
' folder')
end
#
# check annotation_dir
unless @store.key?(:annotation_dir)
log_required.call('output', 'path to annotation folder')
end
# check annotation_dir => not required!
#

#
# check db
if !@store.key?(:db)
Expand Down Expand Up @@ -324,16 +323,22 @@ def process_config
@store.output.dir = File.expand_path(@store.output.dir, base_dir)
@store.db.parent = File.expand_path(@store.db.parent, base_dir)
@store.query.parent = File.expand_path(@store.query.parent, base_dir)
@store.annotation_dir = File.expand_path(@store.annotation_dir, base_dir)
#
@store.debug.file = File.expand_path(@store.debug.file, base_dir)
#
if @store.key?('annotation_dir') &&
!@store.annotation_dir.nil? &&
@store.annotation_dir != 'nil'
@store.annotation_dir = File.expand_path(@store.annotation_dir, base_dir)
end
#

# check if they exist
did_it_fail = false
{ output_dir: @store.output.dir,
db_parent: @store.db.parent,
query_parent: @store.query.parent,
annotation_dir: @store.annotation_dir }.each do |key, dir|
next if key.to_s == 'annotation_dir' && (dir.nil? || dir == 'nil')
next if Dir.exist? dir
logger.error "Error: Directory for '#{key}' does not exist, please" \
' create it, or change configuration, before running mass blast' \
Expand Down
2 changes: 1 addition & 1 deletion config/default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ query:
# backberry comp33_u1 33
# bilberry comp33_u1 42
# ....
annotation_dir: "db_and_queries/annotation"
annotation_dir: nil
#
# how to format the output file. by default should show all columns
format:
Expand Down
3 changes: 3 additions & 0 deletions src/reporting.rb
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ def write_fasta
end

def merge_annotation
# guard clause
return nil if !@store.key?(:annotation_dir) || @store.annotation_dir.nil?
#
annot_files = Dir[File.join(@store.annotation_dir, '*.csv')]
annot_files.each_with_index do |file, index|
merge_csv file, index
Expand Down
1 change: 0 additions & 1 deletion test/blastn/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,3 @@ query:
folders:
- query
#
annotation_dir: ../../db_and_queries/annotation
6 changes: 3 additions & 3 deletions test/blastn/result/results.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
engine folder file_name db qseqid evalue pident qcovs sseqid contig_count nt_aligned_seq aa_aligned_seq nt_db_seq aa_db_seq nt_db_longest_orf nt_db_longest_orf_len nt_db_longest_orf_same_size aa_db_longest_orf aa_db_longest_orf_len aa_db_longest_orf_same_size expression_level_annot_0 yada_annot_0 expression_level_annot_1 bla_annot_1
means the engine used means the folder of origin from the query means the query filename means the database of the result means Query Seq-id means Expect value means Percentage of identical matches means Query Coverage Per Subject means Subject Seq-id means number of results for this contig with less identity means nucleotide alignment from db means amino-acid alignment from db means nucleotide full sequence from db means amino-acid full sequence from db means longest nucleotide orf from db in alignment means length of longest nucleotide orf from db in alignment means how many same sized longest nucleotide orf from db in alignment means longest amino-acid orf from db in alignment means length of longest amino-acid orf from db in alignment means how many same sized longest amino-acid orf from db in alignment /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level2.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level2.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level2.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level2.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level2.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level2.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level.csv /home/averissimo/work/research/repos/2016/mass-blast/db_and_queries/annotation/expression_level.csv
tblastn query caffeoyl blackberry_loch_ness 1.2.1.44 0.0 85.743 99 Unigene7060_All 5 agctcttcactttccggccacggccaaactgtgtgtgtcaccggggccggaggcttcatcgcttcttggattgtgaagcttcttctggagagaggctataatgtgagaggaaccttgagaaacccagatgacccgaagaatgctcatttgagagagctggaaggagccacagagaggctgagcttgcgaaaagccgatcttctcgatttcgagagcctcaaagaagccattaacggctgcgatggcgttttccacacagcatcacctgtaactgatgatccggaacaaatggtggagccggcggtgaatggaacaaagaatgtgattgtggcggcatctgaagccaaggttaaacgcgtggtcttcacgtcttcaattggtgctgtctacatggaccccgccaggggtcccgatgtggttgtcgacgagtcctgctggagtgacctcgagttttgcaagaacaccaagaactggtactgctacgggaaagctgtggcggagcaagcagcatgggatgaggccaaagaaaagggggtggacttggtggtggtgaacccagttttggtgcttggaccactgctccaaccaactattaatgccagcattgtccacatcctcaagtacttgactggctcggccaagacttatgccaattcagttcaggcctatgtgcatgttaaggacgtggcattagctcacatactggtgtatgaaactccctctgcctcgggccgttacctttgcgccgagagtgtccttcaccggggagatgtcgttgaaatccttgccaagttcttccctgaatacccaatacccaccaagttgaaagatgatgggaaacccagagcaataccgtacaagttctcaaaccagaagcttcaagacctgggtttggagttcaccccagtgaaacagagcctatatgacactgtcaagagcttgcaggacaagggtcaccttaaagctcctgcaagacaagaagaagactccattaagatccaatcttaa SSSLSGHGQTVCVTGAGGFIASWIVKLLLERGYNVRGTLRNPDDPKNAHLRELEGATERLSLRKADLLDFESLKEAINGCDGVFHTASPVTDDPEQMVEPAVNGTKNVIVAASEAKVKRVVFTSSIGAVYMDPARGPDVVVDESCWSDLEFCKNTKNWYCYGKAVAEQAAWDEAKEKGVDLVVVNPVLVLGPLLQPTINASIVHILKYLTGSAKTYANSVQAYVHVKDVALAHILVYETPSASGRYLCAESVLHRGDVVEILAKFFPEYPIPTKLKDDGKPRAIPYKFSNQKLQDLGLEFTPVKQSLYDTVKSLQDKGHLKAPARQEEDSIKIQS* agtctgggaggcctttcccttctccttatataaaccactctcttctttgatatattcattcgtgactcgatcttccattacaatacaaaacttccaattcccactagctacctttaattgtctctttgcagaaacaccaattaacttaagatgcctgctgatcaaagctcttcactttccggccacggccaaactgtgtgtgtcaccggggccggaggcttcatcgcttcttggattgtgaagcttcttctggagagaggctataatgtgagaggaaccttgagaaacccagatgacccgaagaatgctcatttgagagagctggaaggagccacagagaggctgagcttgcgaaaagccgatcttctcgatttcgagagcctcaaagaagccattaacggctgcgatggcgttttccacacagcatcacctgtaactgatgatccggaacaaatggtggagccggcggtgaatggaacaaagaatgtgattgtggcggcatctgaagccaaggttaaacgcgtggtcttcacgtcttcaattggtgctgtctacatggaccccgccaggggtcccgatgtggttgtcgacgagtcctgctggagtgacctcgagttttgcaagaacaccaagaactggtactgctacgggaaagctgtggcggagcaagcagcatgggatgaggccaaagaaaagggggtggacttggtggtggtgaacccagttttggtgcttggaccactgctccaaccaactattaatgccagcattgtccacatcctcaagtacttgactggctcggccaagacttatgccaattcagttcaggcctatgtgcatgttaaggacgtggcattagctcacatactggtgtatgaaactccctctgcctcgggccgttacctttgcgccgagagtgtccttcaccggggagatgtcgttgaaatccttgccaagttcttccctgaatacccaatacccaccaagttgaaagatgatgggaaacccagagcaataccgtacaagttctcaaaccagaagcttcaagacctgggtttggagttcaccccagtgaaacagagcctatatgacactgtcaagagcttgcaggacaagggtcaccttaaagctcctgcaagacaagaagaagactccattaagatccaatcttaattaaagcatattttgggtgctactagccgaaaataattgcaggctcttacctccaaaatttggtagcaatcagaatattaagatttgttaatgaaaggacaatgaaaagttgtgcgagtatttgtatgagtcaaaaccttaacaaatggtttttttatttactaaattctaattgtcatttggttcatatatatatatatagatatatataattctataatcgtatttttaaatttcatgtaatagtgtccacgaaggag SLGGLSLLLI*TTLFFDIFIRDSIFHYNTKLPIPTSYL*LSLCRNTN*LKMPADQSSSLSGHGQTVCVTGAGGFIASWIVKLLLERGYNVRGTLRNPDDPKNAHLRELEGATERLSLRKADLLDFESLKEAINGCDGVFHTASPVTDDPEQMVEPAVNGTKNVIVAASEAKVKRVVFTSSIGAVYMDPARGPDVVVDESCWSDLEFCKNTKNWYCYGKAVAEQAAWDEAKEKGVDLVVVNPVLVLGPLLQPTINASIVHILKYLTGSAKTYANSVQAYVHVKDVALAHILVYETPSASGRYLCAESVLHRGDVVEILAKFFPEYPIPTKLKDDGKPRAIPYKFSNQKLQDLGLEFTPVKQSLYDTVKSLQDKGHLKAPARQEEDSIKIQS*LKHILGATSRK*LQALTSKIW*QSEY*DLLMKGQ*KVVRVFV*VKTLTNGFFIY*ILIVIWFIYIYIDIYNSIIVFLNFM**CPRR atgcctgctgatcaaagctcttcactttccggccacggccaaactgtgtgtgtcaccggggccggaggcttcatcgcttcttggattgtgaagcttcttctggagagaggctataatgtgagaggaaccttgagaaacccagatgacccgaagaatgctcatttgagagagctggaaggagccacagagaggctgagcttgcgaaaagccgatcttctcgatttcgagagcctcaaagaagccattaacggctgcgatggcgttttccacacagcatcacctgtaactgatgatccggaacaaatggtggagccggcggtgaatggaacaaagaatgtgattgtggcggcatctgaagccaaggttaaacgcgtggtcttcacgtcttcaattggtgctgtctacatggaccccgccaggggtcccgatgtggttgtcgacgagtcctgctggagtgacctcgagttttgcaagaacaccaagaactggtactgctacgggaaagctgtggcggagcaagcagcatgggatgaggccaaagaaaagggggtggacttggtggtggtgaacccagttttggtgcttggaccactgctccaaccaactattaatgccagcattgtccacatcctcaagtacttgactggctcggccaagacttatgccaattcagttcaggcctatgtgcatgttaaggacgtggcattagctcacatactggtgtatgaaactccctctgcctcgggccgttacctttgcgccgagagtgtccttcaccggggagatgtcgttgaaatccttgccaagttcttccctgaatacccaatacccaccaagttgaaagatgatgggaaacccagagcaataccgtacaagttctcaaaccagaagcttcaagacctgggtttggagttcaccccagtgaaacagagcctatatgacactgtcaagagcttgcaggacaagggtcaccttaaagctcctgcaagacaagaagaagactccattaagatccaatcttaa 1023 0 MPADQSSSLSGHGQTVCVTGAGGFIASWIVKLLLERGYNVRGTLRNPDDPKNAHLRELEGATERLSLRKADLLDFESLKEAINGCDGVFHTASPVTDDPEQMVEPAVNGTKNVIVAASEAKVKRVVFTSSIGAVYMDPARGPDVVVDESCWSDLEFCKNTKNWYCYGKAVAEQAAWDEAKEKGVDLVVVNPVLVLGPLLQPTINASIVHILKYLTGSAKTYANSVQAYVHVKDVALAHILVYETPSASGRYLCAESVLHRGDVVEILAKFFPEYPIPTKLKDDGKPRAIPYKFSNQKLQDLGLEFTPVKQSLYDTVKSLQDKGHLKAPARQEEDSIKIQS* 341 0 "" "" "" ""
engine folder file_name db qseqid evalue pident qcovs sseqid contig_count nt_aligned_seq aa_aligned_seq nt_db_seq aa_db_seq nt_db_longest_orf nt_db_longest_orf_len nt_db_longest_orf_same_size aa_db_longest_orf aa_db_longest_orf_len aa_db_longest_orf_same_size
means the engine used means the folder of origin from the query means the query filename means the database of the result means Query Seq-id means Expect value means Percentage of identical matches means Query Coverage Per Subject means Subject Seq-id means number of results for this contig with less identity means nucleotide alignment from db means amino-acid alignment from db means nucleotide full sequence from db means amino-acid full sequence from db means longest nucleotide orf from db in alignment means length of longest nucleotide orf from db in alignment means how many same sized longest nucleotide orf from db in alignment means longest amino-acid orf from db in alignment means length of longest amino-acid orf from db in alignment means how many same sized longest amino-acid orf from db in alignment
tblastn query caffeoyl blackberry_loch_ness 1.2.1.44 0.0 85.743 99 Unigene7060_All 5 agctcttcactttccggccacggccaaactgtgtgtgtcaccggggccggaggcttcatcgcttcttggattgtgaagcttcttctggagagaggctataatgtgagaggaaccttgagaaacccagatgacccgaagaatgctcatttgagagagctggaaggagccacagagaggctgagcttgcgaaaagccgatcttctcgatttcgagagcctcaaagaagccattaacggctgcgatggcgttttccacacagcatcacctgtaactgatgatccggaacaaatggtggagccggcggtgaatggaacaaagaatgtgattgtggcggcatctgaagccaaggttaaacgcgtggtcttcacgtcttcaattggtgctgtctacatggaccccgccaggggtcccgatgtggttgtcgacgagtcctgctggagtgacctcgagttttgcaagaacaccaagaactggtactgctacgggaaagctgtggcggagcaagcagcatgggatgaggccaaagaaaagggggtggacttggtggtggtgaacccagttttggtgcttggaccactgctccaaccaactattaatgccagcattgtccacatcctcaagtacttgactggctcggccaagacttatgccaattcagttcaggcctatgtgcatgttaaggacgtggcattagctcacatactggtgtatgaaactccctctgcctcgggccgttacctttgcgccgagagtgtccttcaccggggagatgtcgttgaaatccttgccaagttcttccctgaatacccaatacccaccaagttgaaagatgatgggaaacccagagcaataccgtacaagttctcaaaccagaagcttcaagacctgggtttggagttcaccccagtgaaacagagcctatatgacactgtcaagagcttgcaggacaagggtcaccttaaagctcctgcaagacaagaagaagactccattaagatccaatcttaa SSSLSGHGQTVCVTGAGGFIASWIVKLLLERGYNVRGTLRNPDDPKNAHLRELEGATERLSLRKADLLDFESLKEAINGCDGVFHTASPVTDDPEQMVEPAVNGTKNVIVAASEAKVKRVVFTSSIGAVYMDPARGPDVVVDESCWSDLEFCKNTKNWYCYGKAVAEQAAWDEAKEKGVDLVVVNPVLVLGPLLQPTINASIVHILKYLTGSAKTYANSVQAYVHVKDVALAHILVYETPSASGRYLCAESVLHRGDVVEILAKFFPEYPIPTKLKDDGKPRAIPYKFSNQKLQDLGLEFTPVKQSLYDTVKSLQDKGHLKAPARQEEDSIKIQS* agtctgggaggcctttcccttctccttatataaaccactctcttctttgatatattcattcgtgactcgatcttccattacaatacaaaacttccaattcccactagctacctttaattgtctctttgcagaaacaccaattaacttaagatgcctgctgatcaaagctcttcactttccggccacggccaaactgtgtgtgtcaccggggccggaggcttcatcgcttcttggattgtgaagcttcttctggagagaggctataatgtgagaggaaccttgagaaacccagatgacccgaagaatgctcatttgagagagctggaaggagccacagagaggctgagcttgcgaaaagccgatcttctcgatttcgagagcctcaaagaagccattaacggctgcgatggcgttttccacacagcatcacctgtaactgatgatccggaacaaatggtggagccggcggtgaatggaacaaagaatgtgattgtggcggcatctgaagccaaggttaaacgcgtggtcttcacgtcttcaattggtgctgtctacatggaccccgccaggggtcccgatgtggttgtcgacgagtcctgctggagtgacctcgagttttgcaagaacaccaagaactggtactgctacgggaaagctgtggcggagcaagcagcatgggatgaggccaaagaaaagggggtggacttggtggtggtgaacccagttttggtgcttggaccactgctccaaccaactattaatgccagcattgtccacatcctcaagtacttgactggctcggccaagacttatgccaattcagttcaggcctatgtgcatgttaaggacgtggcattagctcacatactggtgtatgaaactccctctgcctcgggccgttacctttgcgccgagagtgtccttcaccggggagatgtcgttgaaatccttgccaagttcttccctgaatacccaatacccaccaagttgaaagatgatgggaaacccagagcaataccgtacaagttctcaaaccagaagcttcaagacctgggtttggagttcaccccagtgaaacagagcctatatgacactgtcaagagcttgcaggacaagggtcaccttaaagctcctgcaagacaagaagaagactccattaagatccaatcttaattaaagcatattttgggtgctactagccgaaaataattgcaggctcttacctccaaaatttggtagcaatcagaatattaagatttgttaatgaaaggacaatgaaaagttgtgcgagtatttgtatgagtcaaaaccttaacaaatggtttttttatttactaaattctaattgtcatttggttcatatatatatatatagatatatataattctataatcgtatttttaaatttcatgtaatagtgtccacgaaggag SLGGLSLLLI*TTLFFDIFIRDSIFHYNTKLPIPTSYL*LSLCRNTN*LKMPADQSSSLSGHGQTVCVTGAGGFIASWIVKLLLERGYNVRGTLRNPDDPKNAHLRELEGATERLSLRKADLLDFESLKEAINGCDGVFHTASPVTDDPEQMVEPAVNGTKNVIVAASEAKVKRVVFTSSIGAVYMDPARGPDVVVDESCWSDLEFCKNTKNWYCYGKAVAEQAAWDEAKEKGVDLVVVNPVLVLGPLLQPTINASIVHILKYLTGSAKTYANSVQAYVHVKDVALAHILVYETPSASGRYLCAESVLHRGDVVEILAKFFPEYPIPTKLKDDGKPRAIPYKFSNQKLQDLGLEFTPVKQSLYDTVKSLQDKGHLKAPARQEEDSIKIQS*LKHILGATSRK*LQALTSKIW*QSEY*DLLMKGQ*KVVRVFV*VKTLTNGFFIY*ILIVIWFIYIYIDIYNSIIVFLNFM**CPRR atgcctgctgatcaaagctcttcactttccggccacggccaaactgtgtgtgtcaccggggccggaggcttcatcgcttcttggattgtgaagcttcttctggagagaggctataatgtgagaggaaccttgagaaacccagatgacccgaagaatgctcatttgagagagctggaaggagccacagagaggctgagcttgcgaaaagccgatcttctcgatttcgagagcctcaaagaagccattaacggctgcgatggcgttttccacacagcatcacctgtaactgatgatccggaacaaatggtggagccggcggtgaatggaacaaagaatgtgattgtggcggcatctgaagccaaggttaaacgcgtggtcttcacgtcttcaattggtgctgtctacatggaccccgccaggggtcccgatgtggttgtcgacgagtcctgctggagtgacctcgagttttgcaagaacaccaagaactggtactgctacgggaaagctgtggcggagcaagcagcatgggatgaggccaaagaaaagggggtggacttggtggtggtgaacccagttttggtgcttggaccactgctccaaccaactattaatgccagcattgtccacatcctcaagtacttgactggctcggccaagacttatgccaattcagttcaggcctatgtgcatgttaaggacgtggcattagctcacatactggtgtatgaaactccctctgcctcgggccgttacctttgcgccgagagtgtccttcaccggggagatgtcgttgaaatccttgccaagttcttccctgaatacccaatacccaccaagttgaaagatgatgggaaacccagagcaataccgtacaagttctcaaaccagaagcttcaagacctgggtttggagttcaccccagtgaaacagagcctatatgacactgtcaagagcttgcaggacaagggtcaccttaaagctcctgcaagacaagaagaagactccattaagatccaatcttaa 1023 0 MPADQSSSLSGHGQTVCVTGAGGFIASWIVKLLLERGYNVRGTLRNPDDPKNAHLRELEGATERLSLRKADLLDFESLKEAINGCDGVFHTASPVTDDPEQMVEPAVNGTKNVIVAASEAKVKRVVFTSSIGAVYMDPARGPDVVVDESCWSDLEFCKNTKNWYCYGKAVAEQAAWDEAKEKGVDLVVVNPVLVLGPLLQPTINASIVHILKYLTGSAKTYANSVQAYVHVKDVALAHILVYETPSASGRYLCAESVLHRGDVVEILAKFFPEYPIPTKLKDDGKPRAIPYKFSNQKLQDLGLEFTPVKQSLYDTVKSLQDKGHLKAPARQEEDSIKIQS* 341 0
Loading

0 comments on commit e638c9d

Please sign in to comment.