-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathVAMP_feature.pl
56 lines (48 loc) · 1.66 KB
/
VAMP_feature.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# Author: Jiwoong Kim ([email protected])
use strict;
use warnings;
local $SIG{__WARN__} = sub { die $_[0] };
use Cwd 'abs_path';
use Getopt::Long qw(:config no_ignore_case);
(my $vampPath = abs_path($0)) =~ s/\/[^\/]*$//;
my $dataPath = "$vampPath/VAMP_data";
GetOptions('h' => \(my $help = ''),
);
if($help || scalar(@ARGV) == 0) {
die <<EOF;
Usage: perl VAMP_feature.pl [options] VAMP.txt > VAMP_feature.txt
Options: -h display this help message
EOF
}
my ($file) = @ARGV;
my %clusterStartEndTokenListListHash = ();
{
open(my $reader, $file);
while(my $line = <$reader>) {
chomp($line);
my @tokenList = split(/\t/, $line, -1);
my ($query, $genotype) = @tokenList;
my ($cluster, $clusterPosition, $clusterAA, $variantAA) = split(/\|/, $genotype);
if(defined($clusterPosition)) {
my ($genotypeStart, $genotypeEnd) = ($clusterPosition, $clusterPosition + length($clusterAA) - 1);
push(@{$clusterStartEndTokenListListHash{$cluster}}, [$genotypeStart, $genotypeEnd, @tokenList]);
}
}
close($reader);
}
{
open(my $reader, "$dataPath/cluster.feature.txt");
while(my $line = <$reader>) {
chomp($line);
my ($cluster, $start, $end, $clusterSequence, $original, $variation, $type, $description) = split(/\t/, $line, -1);
if(defined(my $startEndTokenListList = $clusterStartEndTokenListListHash{$cluster})) {
foreach(@$startEndTokenListList) {
my ($genotypeStart, $genotypeEnd, @tokenList) = @$_;
if(($start eq '' && $end eq '') || ($start <= $genotypeEnd && $genotypeStart <= $end)) {
print join("\t", @tokenList, $cluster, $start, $end, $clusterSequence, $original, $variation, $type, $description), "\n";
}
}
}
}
close($reader);
}