-
Notifications
You must be signed in to change notification settings - Fork 0
/
gff2windowsbar4TE.pl
111 lines (94 loc) · 2.42 KB
/
gff2windowsbar4TE.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/perl
=header
The script is designed to generate a bar.txt file for sliding window view repeat density on genome.
perl ../bin/gff2windowsbar4TE.pl -chrlen IRGSP.chrlen -bar rice_repeat_bar -gff IRGSP.build5.RepeatMasker.out.gff.chr > log 2> log2 &
-chrlen: chr length
-bar: directory for output bar file
-gff: directory for input gff file
=cut
use Getopt::Long;
use warnings;
use strict;
our %opt;
GetOptions(\%opt,"window:s","step:s","chrlen:s","bar:s","gff:s","help");
if ($opt{help} or keys %opt < 1){
print "Usage: run perl $0 -chrlen rice.chrlen -bar rice_repeat_bar -gff IRGSP.build5.RepeatMasker.out.gff.chr\n";
exit();
}
$opt{window} ||= 500000;
$opt{step} ||= 50000;
`mkdir $opt{bar}` unless (-e $opt{bar});
my $refchr=chrlen($opt{chrlen});
my @file=glob("$opt{gff}/chr*");
chomp @file;
foreach(@file){
my $file=$_;
if ($_=~/(chr\d+)/){
my $chr=$1;
my $type="TE";
print "$file\n";
BEDdensity($file,$chr,$type,$refchr->{$chr},$opt{window},$opt{step});
}
}
#system ("/home/jfchen/software/barloader/barloader $opt{bar}");
##############
sub BEDdensity
{
my ($file,$chr,$type,$chrlen,$win,$step)=@_;
my %hash;
my %meth;
open IN, "$file" or die "$!";
while (<IN>){
chomp $_;
#print "$_\n";
next if ($_ eq "" );
next if ($_ =~/^chrUN/);
next if ($_ =~/^#/);
my $line=$_;
sliding($line,$win,$step,\%hash,\%meth);
}
close IN;
open OUT, ">$opt{bar}/$chr.$type.bar.txt" or die "$!";
foreach(sort {$a <=> $b} keys %meth){
my $density=100*$meth{$_}/$win;
print OUT "$chr\t$_\t$density\n";
}
close OUT;
}
#################
sub sliding
{
my ($line,$win,$step,$hash,$meth)=@_;
my @unit=split("\t",$line);
my $bin=int ($unit[3]/$step) + 1;
for(my $i=$bin;$i>0;$i--){
my $start=($i-1)*$step;
my $end=$start+$win;
#print "$i\t$unit[1]\t$start\t$end\n";
if ($unit[3] >= $start and $unit[4] <= $end) {
$meth->{$start}+=$unit[4]-$unit[3]+1;
}elsif($unit[3] < $start and $unit[4] > $start){
$meth->{$start}+=$unit[4]-$start+1;
}elsif($unit[3] < $end and $unit[4] > $end){
$meth->{$start}+=$end-$unit[3]+1;
}elsif($unit[3] >= $end or $unit[4] <= $start){
return;
}
}
return;
}
#################
### get chr length hash
sub chrlen
{
my ($file)=@_;
my %hash;
open IN, "$file" or die "$!";
while(<IN>){
chomp $_;
my @unit=split("\t",$_);
$hash{$unit[0]}=$unit[1];
}
close IN;
return \%hash;
}