-
Notifications
You must be signed in to change notification settings - Fork 5
/
badgerBtab
executable file
·95 lines (90 loc) · 3.14 KB
/
badgerBtab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env ruby
require 'ostruct'
require 'optparse'
require 'rubygems'
require 'bio'
opt = OpenStruct.new
opt.top = false
ARGV.options do |opts|
opts.banner << " blast-file [...blast-file...]"
opts.on("-t", "--top","only show top hit") {opt.top = true}
begin
opts.parse!
rescue
STDERR.puts $!.message
STDERR.puts opts
exit(1)
end
end
ARGV.push(STDIN) if ARGV.empty?
ARGV.each do |blast|
if (blast == STDIN)
out = STDOUT
else
out = File.new(blast + ".btab", "w")
end
Bio::Blast::Default::Report.open(blast, "r").each do |query|
fields = [""]*21
fields[0] = query.query_def.split(" ").first
fields[2] = query.query_len
fields[3] = query.program
fields[4] = query.db
begin
query.each do |hit|
hsp = hit.hsps.first
fields[5] = hit.definition.split(" ").first
fields[6] = hit.query_start
fields[7] = hit.query_end
fields[8] = hit.target_start
fields[9] = hit.target_end
hitLen = (hit.target_start - hit.target_end).abs + 1
fields[10] = ((hit.identity / hitLen.to_f)*1000).to_i/10.0
fields[12] = hit.score
fields[13] = hit.bit_score
fields[15] = hit.definition
fields[16] = hsp.query_frame
fields[17] = hsp.query_strand
fields[18] = query.db_len
fields[19] = hit.evalue
fields[20] = hit.evalue
fields.each do |field|
out.printf("%s\t", field.to_s)
end
out.printf("\n")
break if opt.top
end
rescue
STDERR.printf("Skipping query %s...\n", fields[0]) if (out != STDOUT)
end
end
out.close
end
# This is a BioRuby remake. The original BTAB program is described here:
# Dubnick M. (1992) Btab--a Blast output parser. Comput Appl Biosci 8(6):601-2
# The parsed BLAST output is presented as single line HSP descriptions, with tab-delimited
# fields in the following order:
#
# [0] Query Sequence Name
# [2] Query Sequence Length
# [3] Search Method -- Blast family application name
# [4] Database Name
# [5] Subject Sequence Name -- Database entry name
# [6],[7] Query Left End, Query Right End -- The endpoints of the part
# of the query sequence which Blast aligns with the subject sequence.
# [8],[9] Subject Left End, Subject Right End -- The endpoints of the part
# of the subject sequence which Blast aligns with the query sequence.
# [10] Percent Identity -- The fraction of residues which are absolute
# matches between the query and subject sequence, expressed in
# percent.
# [12] HSP score
# [13] Bits score
# [15] Description -- A freeform text field which contains the biological
# description field from the database for the subject sequence.
# [16] Query Frame (1, 2, 3, -1, -2, -3)
# [17] Query Strand -- Plus, Minus or null
# [18] DB sequence length
# [19] Expect -- expected value
# [20] P-Value -- Poisson ratio
#
# ** Note ** Intervening field positions which are not described are not currently supported.
# These remain to support compatibility with other existing btab implementations.