-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSR_10_blastp_keeplongesti.pl
58 lines (54 loc) · 1.45 KB
/
SR_10_blastp_keeplongesti.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
my $blastp = "/home/dwood/blastdb_prot/Ref_protein_sets/Transdecoder_dir/blast_results/blastp.outfmt6.emb";
my $transcriptome = "/home/dwood/blastdb_prot/Ref_protein_sets/Transdecoder_dir/blast_results/SU_RedSamp_k25mincov2.fasta.trans_filtered";
#my $transcriptome = $ARGV[0];
#my $blastp = $ARGV[1];
my ($line, $seq, $name, $clust, %length, %iname);
open(IN, "<$transcriptome");
while(!eof(IN)){
$line = readline *IN;
chomp $line;
if ($line =~ m/^>/){
$seq = readline *IN;
chomp $seq;
$name = $line;
$name =~ s/^>//g;
$name =~ s/ .*//g;
$clust = $name;
$clust =~ s/_i.*//g;
if ($length{$clust} < length($seq)){
$length{$clust} = length($seq);
$iname{$clust} = $name;
}
}else{
die "something went horrily wrong\n";
}
}
my (%hash, $item);
foreach $item (keys %iname){
print $iname{$item}."\t";
$hash{$iname{$item}} = "";
}
#So then filter the blast database for these guys (if they've got more than one protein...well,go for the one with the top bitscore...
my (%toprint);
open(IN, "<$blastp");
while(!eof(IN)){
$line = readline *IN;
chomp $line;
@temp = split/\t/, $line;
$blastname = $temp[0];
$blastname =~ s/\..*//g;
if (exists ($hash{$blastname})){
if (exists $toprint{$blastname}){
if ($temp[11] > (split/\t/, $toprint{$blastname})[11]){
$toprint{$blastname} = $line;
}
}
else{
$toprint{$blastname} = $line;
}
}
}
open(OUT, ">$blastp.longesti");
foreach $item (keys %toprint){
print OUT $toprint{$item}."\n";
}