Skip to content

Commit

Permalink
fix issue du to new templete, ID had to be changed too. Optimize firs…
Browse files Browse the repository at this point in the history
…t teest to see if L1 exists
  • Loading branch information
Juke34 committed Mar 21, 2024
1 parent 9212a33 commit 792e185
Showing 1 changed file with 81 additions and 66 deletions.
147 changes: 81 additions & 66 deletions lib/AGAT/OmniscientI.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1706,14 +1706,21 @@ sub _check_l2_linked_to_l3{
#L3 linked directly to L1
my $has_l1_feature = undef;
my $id_l2_to_replace = undef;
foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){ # primary_tag_key_level1 = gene or repeat etc...

# ------------ Check for L1 features ------------

# Check if we have one l1 with that ID among all L1/tag tuples
foreach my $tag_l1 (keys %{$hash_omniscient->{'level1'}}){
if( exists_keys ($hash_omniscient, ('level1', $tag_l1, $id_l2))){
# case where it's linked by parent/ID attribute
$has_l1_feature = $hash_omniscient->{"level1"}{$tag_l1}{$id_l2};
last;
}
else{
}

# No l1_feature found yet, let's check if one L1 feature with the common tag
if (! $has_l1_feature){
foreach my $tag_l1 (sort {$a cmp $b} keys %{$hash_omniscient->{'level1'}}){
if(! $common_tag_in_l1 ){
$common_tag_in_l1 = _create_hash_common_tag_l1($hash_omniscient);
} # fill it (only once) because will be needed
Expand All @@ -1724,99 +1731,107 @@ sub _check_l2_linked_to_l3{
#check if we have the tag

foreach my $l3_feature (@{$hash_omniscient->{'level3'}{$tag_l3}{$id_l2}}){
print "fefe ".$l3_feature->gff_string()."\n";
if($l3_feature->has_tag($tag) ) {

print "has_tag $tag\n";
# case where it's linked by comon_tag attribute
if (exists_keys($common_tag_in_l1,( $tag, lc($l3_feature->_tag_value($tag)) ) ) ){
print "exists_keys\n";
if($#{$common_tag_in_l1->{$tag}{lc($l3_feature->_tag_value($tag))}} == 0){
my $id = $common_tag_in_l1->{$tag}{lc($l3_feature->_tag_value($tag))}[0]->{'id'};
my $ptag = $common_tag_in_l1->{$tag}{lc($l3_feature->_tag_value($tag))}[0]->{'ptag'};
$has_l1_feature = $hash_omniscient->{'level1'}{$ptag}{$id};
$id_l2_to_replace = $l3_feature->_tag_value('Parent');
print "goes into it !!!! $id_l2_to_replace \n";
last;
}
else{
dual_print($log, "\n\nSeveral potential L1 parent with comnon tag $tag with value ".lc($l3_feature->_tag_value($tag)).". We do not use any\n", 0);
dual_print($log, "\n\nSeveral potential L1 parent with comnon tag $tag with value ".lc($l3_feature->_tag_value($tag)).". We do not use any\n", $verbose);
}
}
}
if ($has_l1_feature){last;}
}
}
}
}

if ($has_l1_feature){

my $l1_ID = $has_l1_feature->_tag_value('ID');
my $has_gemoma_l2=undef;
foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){
if ( exists_keys($hash_omniscient,("level2", $tag_l2, lc($l1_ID))) ) {
# HERE we have seen there is a L2. We want to use it and modify it Only
# in case of GeMoMa issue #290
# to make the difference we check that no level3 feature is attached to the detected l2
$has_gemoma_l2=$tag_l2;
foreach my $l2_feature ( @{$hash_omniscient->{'level2'}{$tag_l2}{lc($l1_ID)} } ){
foreach my $tag_l3 (sort {$a cmp $b} keys %{$hash_omniscient->{'level3'}}){
if ( exists_keys($hash_omniscient,("level3", $tag_l3, lc($l2_feature->_tag_value('ID')))) ) {
# Level3 features was attached to the L2, we cannot play the GeMoMa case
# otherwise interfer with other cases (level_missing.t tests testF, testB2 and gff_syntax.t 37_test.gff)
$has_gemoma_l2=undef;
}
# l1_feature found
if ($has_l1_feature){

my $l1_ID = $has_l1_feature->_tag_value('ID');
my $has_gemoma_l2=undef;
foreach my $tag_l2 (keys %{$hash_omniscient->{'level2'}}){
if ( exists_keys($hash_omniscient,("level2", $tag_l2, lc($l1_ID))) ) {
# HERE we have seen there is a L2. We want to use it and modify it Only
# in case of GeMoMa issue #290
# to make the difference we check that no level3 feature is attached to the detected l2
$has_gemoma_l2=$tag_l2;
foreach my $l2_feature ( @{$hash_omniscient->{'level2'}{$tag_l2}{lc($l1_ID)} } ){
foreach my $tag_l3 (sort {$a cmp $b} keys %{$hash_omniscient->{'level3'}}){
if ( exists_keys($hash_omniscient,("level3", $tag_l3, lc($l2_feature->_tag_value('ID')))) ) {
# Level3 features was attached to the L2, we cannot play the GeMoMa case
# otherwise interfer with other cases (level_missing.t tests testF, testB2 and gff_syntax.t 37_test.gff)
$has_gemoma_l2=undef;
}
}
}
}
}

my $l2_feature;
if (! $has_gemoma_l2) {
#create a copy of the first mRNA feature;
my $primary_tag_l2;
if (exists_keys($hash_omniscient,("level3",'cds', $id_l2) ) ){
$primary_tag_l2 = 'mRNA';
} # guess mRNA
else{ $primary_tag_l2 = 'RNA' ;} #we cannot guess

# Use a L3 feature as template (better as L1 see bug #441)
$l2_feature = clean_clone( { omniscient => $hash_omniscient,
feature => $hash_omniscient->{"level3"}{$tag_l3}{$id_l2}[0],
new_primary_tag => $primary_tag_l2
} );

} else {
$l2_feature = @{$hash_omniscient->{"level2"}{$has_gemoma_l2}{lc($l1_ID)}}[0];
$id_l2_to_replace = $has_l1_feature->_tag_value('ID');
delete $mRNAGeneLink->{lc($l2_feature->_tag_value('ID'))}
}
my $l2_feature;
if (! $has_gemoma_l2) {
#create a copy of the first mRNA feature;
my $primary_tag_l2;
if (exists_keys($hash_omniscient,("level3",'cds', $id_l2) ) ){
$primary_tag_l2 = 'mRNA';
} # guess mRNA
else{ $primary_tag_l2 = 'RNA' ;} #we cannot guess

# Use a L3 feature as template (better as L1 see bug #441)
$l2_feature = clean_clone( { omniscient => $hash_omniscient,
feature => $hash_omniscient->{"level3"}{$tag_l3}{$id_l2}[0],
new_primary_tag => $primary_tag_l2,
new_id => $hash_omniscient->{"level3"}{$tag_l3}{$id_l2}[0]->_tag_value('Parent')
} );

} else {
$l2_feature = @{$hash_omniscient->{"level2"}{$has_gemoma_l2}{lc($l1_ID)}}[0];
$id_l2_to_replace = $has_l1_feature->_tag_value('ID');
delete $mRNAGeneLink->{lc($l2_feature->_tag_value('ID'))}
}

#Modify parent L2 (and L1 id if necessary)
create_or_replace_tag($l2_feature,'Parent', $l1_ID); #modify ID to replace by parent value
create_or_replace_tag($l2_feature,'ID', $id_l2_to_replace) if ($id_l2_to_replace); #modify ID to replace by parent value
check_level2_positions($hash_omniscient, $l2_feature);

if ( exists_keys ($hashID,('uid', lc($id_l2) ) ) ){ #the easiest is to modifiy the gene id

my $new_l1id = _check_uniq_id_feature($hash_omniscient, $hashID, $has_l1_feature); # to check if ID was already in used by level1 feature
create_or_replace_tag($l2_feature,'Parent', $new_l1id); #modify ID to replace by parent value
my $primary_tag_l1 =$has_l1_feature->primary_tag();
$hash_omniscient->{"level1"}{lc($primary_tag_l1)}{lc($new_l1id)} = delete $hash_omniscient->{"level1"}{lc($primary_tag_l1)}{lc($l1_ID)}; # now save it in omniscient
#fill the $mRNAGeneLink hash
$mRNAGeneLink->{ $id_l2 } = $new_l1id;
if ($has_gemoma_l2){
$hash_omniscient->{'level2'}{lc($l2_feature->primary_tag)}{lc($new_l1id)} = delete $hash_omniscient->{'level2'}{lc($l2_feature->primary_tag)}{lc($l1_ID)};
}
else {
push(@{$hash_omniscient->{"level2"}{lc($l2_feature->primary_tag)}{lc($new_l1id)}}, $l2_feature);
}
#Modify parent L2 (and L1 id if necessary)
create_or_replace_tag($l2_feature,'Parent', $l1_ID); #modify ID to replace by parent value
create_or_replace_tag($l2_feature,'ID', $id_l2_to_replace) if ($id_l2_to_replace); #modify ID to replace by parent value
check_level2_positions($hash_omniscient, $l2_feature);

if ( exists_keys ($hashID,('uid', lc($id_l2) ) ) ){ #the easiest is to modifiy the gene id

my $new_l1id = _check_uniq_id_feature($hash_omniscient, $hashID, $has_l1_feature); # to check if ID was already in used by level1 feature
create_or_replace_tag($l2_feature,'Parent', $new_l1id); #modify ID to replace by parent value
my $primary_tag_l1 =$has_l1_feature->primary_tag();
$hash_omniscient->{"level1"}{lc($primary_tag_l1)}{lc($new_l1id)} = delete $hash_omniscient->{"level1"}{lc($primary_tag_l1)}{lc($l1_ID)}; # now save it in omniscient
#fill the $mRNAGeneLink hash
$mRNAGeneLink->{ $id_l2 } = $new_l1id;
if ($has_gemoma_l2){
$hash_omniscient->{'level2'}{lc($l2_feature->primary_tag)}{lc($new_l1id)} = delete $hash_omniscient->{'level2'}{lc($l2_feature->primary_tag)}{lc($l1_ID)};
}
else{
#fill the $mRNAGeneLink hash
$mRNAGeneLink->{ $id_l2 } = $l1_ID; # Always need to keep track about l2->l1, else the method _check_l2_linked_to_l3 will recreate a l1 thinking this relationship is not fill
push(@{$hash_omniscient->{"level2"}{lc($l2_feature->primary_tag)}{lc($l1_ID)}}, $l2_feature);
else {
push(@{$hash_omniscient->{"level2"}{lc($l2_feature->primary_tag)}{lc($new_l1id)}}, $l2_feature);
}
dual_print($log, "L3 had a L1 feature but no L2 feature. Corrected by creating the intermediate L2 feature:\n".$l2_feature->gff_string()."\n", $verbose);
last
}
else{
#fill the $mRNAGeneLink hash
$mRNAGeneLink->{ $id_l2 } = $l1_ID; # Always need to keep track about l2->l1, else the method _check_l2_linked_to_l3 will recreate a l1 thinking this relationship is not fill
push(@{$hash_omniscient->{"level2"}{lc($l2_feature->primary_tag)}{lc($l1_ID)}}, $l2_feature);
}
dual_print($log, "L3 had a L1 feature but no L2 feature. Corrected by creating the intermediate L2 feature:\n".$l2_feature->gff_string()."\nUsing this feature template:\n".$hash_omniscient->{"level3"}{$tag_l3}{$id_l2}[0]->gff_string()."\n", $verbose);
last
}


# ------------ No L1 features found ------------
# it was not previous case (L3 linked directly to L1)
if (! exists_keys($mRNAGeneLink,($id_l2 ) ) ) {
# ---- start fill L2 ----
Expand Down

0 comments on commit 792e185

Please sign in to comment.