• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Ensembl / ensembl-variation / #610355590

22 Sep 2023 02:48PM UTC coverage: 83.241%. First build
#610355590

Pull #1040

travis-ci

Pull Request #1040: Report consequences for each breakend

134 of 134 new or added lines in 6 files covered. (100.0%)

22570 of 27114 relevant lines covered (83.24%)

1430.55 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.48
/modules/Bio/EnsEMBL/Variation/Utils/VariationEffect.pm
1
=head1 LICENSE
2

3
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4
Copyright [2016-2025] EMBL-European Bioinformatics Institute
5

6
Licensed under the Apache License, Version 2.0 (the "License");
7
you may not use this file excepst in compliance with the License.
8
You may obtain a copy of the License at
9

10
     http://www.apache.org/licenses/LICENSE-2.0
11

12
Unless required by applicable law or agreed to in writing, software
13
distributed under the License is distributed on an "AS IS" BASIS,
14
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
See the License for the specific language governing permissions and
16
limitations under the License.
17

18
=cut
19

20

21
=head1 CONTACT
22

23
 Please email comments or questions to the public Ensembl
24
 developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25

26
 Questions may also be sent to the Ensembl help desk at
27
 <http://www.ensembl.org/Help/Contact>.
28

29
=cut
30

31
=head1 NAME
32

33
Bio::EnsEMBL::Variation::Utils::VariationEffect
34

35
=head1 DESCRIPTION
36

37
This module defines a set of predicate subroutines that check the effect of a
38
Bio::EnsEMBL::Variation::VariationFeature on some other Bio::EnsEMBL::Feature. 
39
All of these predicates take a VariationFeatureOverlapAllele as their first and
40
only argument and return a true or false value depending on whether the effect
41
being checked for holds or not. The link between these predicates and the 
42
specific effect is configured in the Bio::EnsEMBL::Variation::Utils::Config 
43
module and a list of OverlapConsequence objects that represent a link between,
44
for example, a Sequence Ontology consequence term, and the predicate that
45
checks for it is provided in the Bio::EnsEMBL::Variation::Utils::Constants
46
module. If you want to add a new consequence you should write a predicate in 
47
this module and then add an entry in the configuration file.
48

49
=cut
50

51
package Bio::EnsEMBL::Variation::Utils::VariationEffect;
52

53
use strict;
68✔
54
use warnings;
68✔
55

56
use base qw(Exporter);
68✔
57

58
our @EXPORT_OK = qw(overlap _intron_overlap _compare_seq_region_names within_feature within_cds MAX_DISTANCE_FROM_TRANSCRIPT within_intron stop_lost stop_retained start_lost frameshift $UPSTREAM_DISTANCE $DOWNSTREAM_DISTANCE $CHROMOSOME_SYNONYMS);
59

60
use constant MAX_DISTANCE_FROM_TRANSCRIPT => 5000;
68✔
61

62
our $UPSTREAM_DISTANCE = MAX_DISTANCE_FROM_TRANSCRIPT;
63
our $DOWNSTREAM_DISTANCE = MAX_DISTANCE_FROM_TRANSCRIPT;
64
our $CHROMOSOME_SYNONYMS = {};
65

66
#
67
# Interface with some of the module function XS reimplementation
68
# 
69
# If Bio::EnsEMBL::XS is installed, assign the function glob to
70
# the XS counterpart, otherwise assign to the original function
71
#
72
BEGIN {
68✔
73
  if (eval { require Bio::EnsEMBL::XS; 1 }) {
×
74
    *overlap = \&Bio::EnsEMBL::XS::Variation::Utils::VariationEffect::overlap;
75
  } else {
68✔
76
    *overlap = \&overlap_perl;
77
  }
78
}
79

80
# these two methods are perl implementations of the above Inline C
81
sub overlap_perl {
4,280✔
82
    my ( $f1_start, $f1_end, $f2_start, $f2_end ) = @_;
83
   
4,280✔
84
    return ( ($f1_end >= $f2_start) and ($f1_start <= $f2_end) );
85
}
86

87
sub _intron_overlap {
83✔
88
  my ($vf_start, $vf_end, $intron_start, $intron_end, $insertion) = @_;
89
  
83✔
90
  if(
91
           overlap($vf_start, $vf_end, $intron_start+2, $intron_start+7) ||
92
           overlap($vf_start, $vf_end, $intron_end-7,   $intron_end-2  ) ||
93
    overlap($vf_start, $vf_end, $intron_start-3, $intron_start-1) ||
94
           overlap($vf_start, $vf_end, $intron_end+1,   $intron_end+3  ) ||
95
           (
96
      $insertion && (
97
               $vf_start == $intron_start   ||
98
               $vf_end   == $intron_end     ||
99
               $vf_start == $intron_start+2 ||
100
               $vf_end   == $intron_end-2
101
      )
102
    )
103
  ) {
66✔
104
    return 1;
105
  }
106
  else {
17✔
107
    return 0;
108
  }
109
}
110

111
sub _compare_seq_region_names {
15✔
112
  my ($region1, $region2) = @_;
15✔
113
  if ($CHROMOSOME_SYNONYMS) {
114
    return 1 if
15✔
115
      (
15✔
116
        exists $CHROMOSOME_SYNONYMS->{$region1} &&
117
        grep /^$region2$/, keys %{$CHROMOSOME_SYNONYMS->{$region1}}
15✔
118
      ) || (
119
        exists $CHROMOSOME_SYNONYMS->{$region2} &&
120
        grep /^$region1$/, keys %{$CHROMOSOME_SYNONYMS->{$region2}}
121
      );
143✔
122
  }
143✔
123
  return lc $region1 eq lc $region2;
143✔
124
}
143✔
125

126
sub within_feature {
143✔
127
    my ($bvfoa, $feat, $bvfo, $bvf, $match_seq_region_names) = @_;
143✔
128
    $bvf  ||= $bvfoa->base_variation_feature;
×
129
    $feat ||= $bvfoa->feature;
×
130
    $match_seq_region_names ||= 0;
131

132
    my $cmp_chr = 1;
143✔
133
    if ($match_seq_region_names) {
134
      my $chr  = $bvf->{chr} || $bvf->{slice}->{seq_region_name};
135
      $cmp_chr = _compare_seq_region_names($chr, $feat->{slice}->{seq_region_name});
136
    }
137

138
    return $cmp_chr && overlap(
139
        $bvf->{start}, 
140
        $bvf->{end},
141
        $feat->{start}, 
14✔
142
        $feat->{end}
14✔
143
    );
14✔
144
}
145

146
sub partial_overlap_feature {
14✔
147
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
148
    $bvf  ||= $bvfoa->base_variation_feature;
149
    $feat ||= $bvfoa->feature;
150
    
151
    return (
152
        within_feature(@_) and 
153
        (not complete_overlap_feature(@_)) and
288✔
154
        (($bvf->{end} > $feat->{end}) or ($bvf->{start} < $feat->{start}))
288✔
155
    );
288✔
156
}
157

158
sub complete_within_feature {
288✔
159
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
160
    $bvf  ||= $bvfoa->base_variation_feature;
161
    $feat ||= $bvfoa->feature;
162
    
163
    return (
164
        ($bvf->{start} >= $feat->{start}) and 
1,189✔
165
        ($bvf->{end} <= $feat->{end})
1,189✔
166
    );
1,189✔
167
}
168

169
sub complete_overlap_feature {
1,189✔
170
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
171
    $bvf  ||= $bvfoa->base_variation_feature;
172
    $feat ||= $bvfoa->feature;
173
    
174
    return ( 
175
        ($bvf->{start} <= $feat->{start}) and 
176
        ($bvf->{end} >= $feat->{end}) 
120✔
177
    );
178
}
120✔
179

×
180
sub _supporting_cnv_terms {
181
  #if variant is CNV, return class SO terms for its supporting variants
×
182
  my $bvf = shift;
×
183

×
184
  return if $bvf->class_SO_term(undef, 1) ne "copy_number_variation";
185
  return unless defined $bvf->structural_variation;
186

187
  my $support_vars  = $bvf->structural_variation->get_all_SupportingStructuralVariants;
68✔
188
  my @support_terms = map { $_->class_SO_term } @{$support_vars};
68✔
189
  return @support_terms;
190
}
191

68✔
192
sub deletion {
2✔
193
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
194
    $bvf ||= $bvfoa->base_variation_feature;
2✔
195
    
196
    # sequence variant will have alleles
197
    if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
198
        my ($ref_allele, $alt_allele) = _get_alleles(@_);
199
        return (
200
            (defined($ref_allele) && ($alt_allele eq '' || length($alt_allele) < length($ref_allele)) and $ref_allele) or
66✔
201
            $bvf->allele_string =~ /deletion/i
202
        );
66✔
203
    }
204
    
205
    # structural variant depends on class
206
    if($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
207
      return (
208
          copy_number_loss(@_) or
209
          ($bvf->class_SO_term(undef, 1) eq 'deletion') or
×
210
          ($bvf->class_SO_term(undef, 1) =~ /deletion/i) or
211
          grep(/deletion/i, _supporting_cnv_terms($bvf))
212
      );
213
    }
×
214
    
×
215
    else { return 0; }
216
}
217

×
218
sub insertion {
×
219
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
220
    $bvf ||= $bvfoa->base_variation_feature;
×
221
    
222
    # sequence variant will have alleles
223
    if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
224
        my ($ref_allele, $alt_allele) = _get_alleles(@_);
225
        return (
226
            (defined($ref_allele) && ($ref_allele eq '' || length($alt_allele) > length($ref_allele)) and $alt_allele) or
×
227
            $bvf->allele_string =~ /insertion/i
×
228
        );
229
    }
230
    
×
231
    # structural variant depends on class
232
    if($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
233
      my $class_SO_term = $bvf->class_SO_term(undef, 1);
234

235
      return (
236
          copy_number_gain(@_) or
237
          ($bvf->class_SO_term(undef, 1) eq 'insertion') or
×
238
          ($bvf->class_SO_term(undef, 1) =~ /insertion/i) or
239
          grep(/insertion/i, _supporting_cnv_terms($bvf))
240
      );
241
    }
8✔
242
    
8✔
243
    else { return 0; }
244
}
245

8✔
246
sub copy_number_gain {
247
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
248
    $bvf ||= $bvfoa->base_variation_feature;
249

250
    return (
251
        duplication(@_) or
252
        tandem_repeat(@_) or
253
        $bvf->class_SO_term(undef, 1) =~ /gain/i or
88✔
254
        grep(/gain/i, _supporting_cnv_terms($bvf))
88✔
255
    );
256
}
257

88✔
258
sub copy_number_loss {
259
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
260
    $bvf ||= $bvfoa->base_variation_feature;
261
    
262
    return (
263
      $bvf->class_SO_term(undef, 1) =~ /loss/i or
8✔
264
      grep(/loss/i, _supporting_cnv_terms($bvf))
8✔
265
    );
266
}
267

268
sub duplication {
8✔
269
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
270
    $bvf ||= $bvfoa->base_variation_feature;
271
    
272
    return (
273
        (
274
            ($bvf->class_SO_term(undef, 1) eq 'duplication') or
275
            ($bvf->class_SO_term(undef, 1) =~ /duplication/i) or
276
            grep(/duplication/i, _supporting_cnv_terms($bvf))
277
        ) and
8✔
278
        (not tandem_repeat(@_))
8✔
279
    );
280
}
281

8✔
282
sub tandem_repeat {
×
283
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
284
    $bvf ||= $bvfoa->base_variation_feature;
×
285
    
×
286
    # for sequence variants, check sequence vs ref
287
    if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
288
        my ($ref_allele, $alt_allele) = _get_alleles(@_);
289
        
×
290
        return 0 unless $ref_allele and $alt_allele;
291
        return 0 unless
×
292
            length($alt_allele) > length($ref_allele) and
293
            length($alt_allele) % length($ref_allele) == 0;
294
        
295
        my $copies = length($alt_allele) / length($ref_allele);
8✔
296
        
297
        return $alt_allele eq $ref_allele x $copies;
8✔
298
    }
299
    
300
    # structural variant depends on class
301
    if($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
302
        return (
303
            ($bvf->class_SO_term(undef, 1) eq 'tandem_duplication') or
304
            ($bvf->class_SO_term(undef, 1) eq 'tandem_repeat') or
305
            ($bvf->class_SO_term(undef, 1) =~ /tandem/i) or
14✔
306
            grep(/tandem/i, _supporting_cnv_terms($bvf))
14✔
307
        );
308
    }
14✔
309
}
310

14✔
311
sub chromosome_breakpoint {
312
  my ($bvfoa, $feat, $bvfo, $bvf) = @_;
313
  $bvf ||= $bvfoa->base_variation_feature;
314

315
  if ($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
316
    return (
317
      ($bvf->class_SO_term(undef, 1) eq 'chromosome_breakpoint') or
6✔
318
      ($bvf->class_SO_term(undef, 1) =~ /chromosome_breakpoint/i)
6✔
319
    );
320
  }
6✔
321
}
322

323
sub feature_ablation {
324
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
×
325
    $feat ||= $bvfoa->base_variation_feature_overlap->feature;
×
326
    
327
    return (complete_overlap_feature($bvfoa, $feat, $bvfo, $bvf) and deletion(@_));
×
328
}
329

330
sub feature_amplification {
331
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
12✔
332
    $feat ||= $bvfoa->base_variation_feature_overlap->feature;
12✔
333
    
334
    return (complete_overlap_feature($bvfoa, $feat, $bvfo, $bvf) and copy_number_gain(@_));
12✔
335
}
336

337
sub feature_elongation {
12✔
338
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
339
    $feat ||= $bvfoa->base_variation_feature_overlap->feature;
340
    
341
    return 0 if $bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele');
342
    
343
    return (
14✔
344
        within_cdna(@_) and
14✔
345
        complete_within_feature($bvfoa, $feat, $bvfo, $bvf) and
14✔
346
        (copy_number_gain(@_) or insertion(@_))
347
    );
14✔
348
}
349

14✔
350
sub feature_truncation {
×
351
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
352
    $bvf  ||= $bvfoa->base_variation_feature;
353
    $feat ||= $bvfoa->base_variation_feature_overlap->feature;
354

355
    return 0 if $bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele');
14✔
356
    
357
    if(chromosome_breakpoint(@_)) {
358
        return 1 if within_feature($bvfoa, $feat, $bvfo, $bvfoa->breakend, 1);
359
    }
360

361
    # require transcripts (but not other feature types) to be within cDNA
362
    return 0 if $feat->isa('Bio::EnsEMBL::Transcript') and not within_cdna(@_);
363

364
    return (
365
        (
366
            partial_overlap_feature($bvfoa, $feat, $bvfo, $bvf) or
367
            complete_within_feature($bvfoa, $feat, $bvfo, $bvf)
364✔
368
        ) and
369
        (
364✔
370
            copy_number_loss(@_) or deletion(@_)
371
        )
372
    );
373
}
283✔
374

40✔
375
sub protein_altering_variant{
36✔
376

377
    ## check in protein
27✔
378
    my ($ref_pep, $alt_pep) = _get_peptide_alleles(@_);
18✔
379

16✔
380
    return 0 unless defined $ref_pep && defined $alt_pep;
381

2✔
382
    ## don't assign if child term appropriate
383

384
    return 0 if  length($alt_pep) eq length($ref_pep);       # synonymous_variant(@_);  missense_variant(@_);
385
    return 0 if  $ref_pep =~/^\*/  || $alt_pep =~/^\*/;      # stop lost/ gained/ retained
386
    return 0 if  $alt_pep =~/^\Q$ref_pep\E|\Q$ref_pep\E$/;   # inframe_insertion(@_);
387

388
    return 0 if inframe_deletion(@_);  
389
    return 0 if start_lost(@_);
390
    return 0 if frameshift(@_);
391

392
    return 1;
393
}
394

53✔
395
#sub transcript_fusion {
396
#    #my ($bvfoa, $feat, $bvfo, $bvf) = @_;
53✔
397
#    #my $bvf   = $bvfoa->base_variation_feature;
398
#    
399
#    return 0;
400
#    
401
#    #my $transcripts = $bvf->_get_overlapping_Transcripts();
53✔
402
#}
53✔
403

404
sub _before_start {
405
    my ($bvf, $feat, $dist) = @_;
406
    
407
    return ( ($bvf->{end} >= ($feat->{start} - $dist)) and 
53✔
408
        ($bvf->{end} < $feat->{start}) );
53✔
409
}
53✔
410

411
sub _after_end {
412
    my ($bvf, $feat, $dist) = @_;
413
    return ( ($bvf->{start} <= ($feat->{end} + $dist)) 
414
            and ($bvf->{start} > $feat->{end}) );
415
}
53✔
416

53✔
417
sub _upstream {
53✔
418
    my ($bvf, $feat, $dist) = @_;
419
    $dist += get_max_shift_length($bvf);
420
    return $feat->strand == 1 ? 
421
        _before_start($bvf, $feat, $dist) : 
422
        _after_end($bvf, $feat, $dist);
423
}
106✔
424

106✔
425
sub _downstream {
106✔
426
    my ($bvf, $feat, $dist) = @_;
×
427
    $dist += get_max_shift_length($bvf);
428
    return $feat->strand == 1 ? 
106✔
429
        _after_end($bvf, $feat, $dist) : 
430
        _before_start($bvf, $feat, $dist);
431
}
432

433
sub get_max_shift_length {
434
  my ($bvf) = shift;
53✔
435
  my $max_shift_length = 0;
53✔
436
  foreach my $hash (@{$bvf->{tva_shift_hashes}}){
53✔
437
    $max_shift_length = $hash->{shift_length} if $hash->{shift_length} > $max_shift_length;
438
  }
53✔
439
  return $max_shift_length;
440
}
441

442
#package Bio::EnsEMBL::Variation::TranscriptVariationAllele;
53✔
443

53✔
444
sub upstream {
53✔
445
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
446
    $bvf  ||= $bvfoa->base_variation_feature;
53✔
447
    $feat ||= $bvfoa->base_variation_feature_overlap->feature;
448

449
    return _upstream($bvf, $feat, $UPSTREAM_DISTANCE);
450
}
×
451

452
sub downstream {
×
453
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
454
    $bvf  ||= $bvfoa->base_variation_feature;
×
455
    $feat ||= $bvfoa->base_variation_feature_overlap->feature;
456

457
    return _downstream($bvf, $feat, $DOWNSTREAM_DISTANCE);
458
}
459

460
sub affects_transcript {
461
    my ($bvf, $tran) = @_;
462
    
463
    return 0 unless $tran->isa('Bio::EnsEMBL::Transcript');
98✔
464
    
465
    return overlap(
466
        $bvf->{start}, 
467
        $bvf->{end},
20✔
468
        $tran->{start} - 5000, 
20✔
469
        $tran->{end} + 5000
470
    );
20✔
471
}
472

473
sub within_transcript {
474
    return within_feature(@_);
×
475
}
×
476

477
sub within_nmd_transcript {
×
478
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
479
    $feat ||= $bvfoa->base_variation_feature_overlap->feature;
480

481
    return ( within_transcript(@_) and ($feat->biotype eq 'nonsense_mediated_decay') );
395✔
482
}
483

484
sub within_coding_gene {
485
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
73✔
486
    $feat ||= $bvfoa->base_variation_feature_overlap->feature;
73✔
487

488
    return ( within_transcript(@_) and $feat->translation );
73✔
489
}
490

491
sub coding_transcript_variant {
492
    return ( (not coding_unknown(@_)) and complete_overlap_feature(@_) and within_coding_gene(@_) );
85✔
493
}
85✔
494

495
sub within_non_coding_gene {
85✔
496
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
497
    $feat ||= $bvfoa->base_variation_feature_overlap->feature;
498

499
    return ( within_transcript(@_) and (not $feat->translation) and (not within_mature_miRNA(@_)) and (not non_coding_exon_variant(@_)) );
500
}
38✔
501

502
sub non_coding_exon_variant {
38✔
503
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
24✔
504
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
505
    
506
    return 0 if complete_overlap_feature(@_) or $feat->translation or within_mature_miRNA(@_);
14✔
507
    
508
    # get overlapped exons
509
    # this may include some non-overlapping ones in the case of transcripts with frameshift introns
510
    # so we double check with overlap()
511
    my $exons = $bvfo->_overlapped_exons;
×
512
    
513
    if(scalar grep {overlap($bvf->{start}, $bvf->{end}, $_->{start}, $_->{end})} @$exons) {
514
        return 1;
515
    }
×
516
    else {
×
517
        return 0;
518
    }
×
519
}
520

521
sub within_miRNA {
522
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
117✔
523
    
117✔
524
    # don't call this for now
117✔
525
    
117✔
526
    return 0;
527
    $feat ||= $bvfoa->base_variation_feature_overlap->feature;
117✔
528
    
529
    return ( ($feat->biotype eq 'miRNA') and within_transcript(@_) );
5✔
530
}
531

5✔
532
sub within_mature_miRNA {
5✔
533
  my ($bvfoa, $feat, $bvfo, $bvf) = @_;
5✔
534
  $bvfo ||= $bvfoa->base_variation_feature_overlap;
5✔
535
  $bvf  ||= $bvfo->base_variation_feature;
536
  $feat ||= $bvfo->feature;
537

538
  return 0 unless ( ($feat->biotype eq 'miRNA') and within_transcript(@_) );
539

1✔
540
  foreach my $attribute(@{ $feat->get_all_Attributes('miRNA') }) {
541

542
    if (defined $attribute && $attribute->value =~ /(\d+)-(\d+)/) {
543
      for my $coord ($bvfo->_mapper->cdna2genomic($1, $2)) {
544
        if ($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
545
          if (overlap(
546
              $bvf->seq_region_start(), 
4✔
547
              $bvf->seq_region_end(), 
548
              $coord->start, 
549
              $coord->end) ) {
550
            return 1;
200✔
551
          }
200✔
552
        }
200✔
553
      }
554
    }
200✔
555
  }
556

200✔
557
  return 0;
558
}
559

560
sub donor_splice_site {
561
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
562
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
185✔
563
    $feat ||= $bvfo->feature;
185✔
564

185✔
565
    my $ie = $bvfoa->_intron_effects($feat, $bvfo, $bvf);
566
    
185✔
567
    return $feat->strand == 1 ? 
568
        $ie->{start_splice_site} :
185✔
569
        $ie->{end_splice_site};
570
}
571

572
sub acceptor_splice_site {
573
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
574
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
50✔
575
    $feat ||= $bvfo->feature;
576

577
    my $ie = $bvfoa->_intron_effects($feat, $bvfo, $bvf);
578
    
247✔
579
    return $feat->strand == 1 ? 
247✔
580
        $ie->{end_splice_site} :
247✔
581
        $ie->{start_splice_site};
247✔
582
}
583

247✔
584
sub essential_splice_site {
585
    return ( acceptor_splice_site(@_) or donor_splice_site(@_) );
586
}
587

125✔
588
sub splice_donor_5th_base_variant {
125✔
589
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
125✔
590
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
125✔
591
    $feat ||= $bvfo->feature;
592
    my $ie = $bvfoa->_intron_effects($feat, $bvfo, $bvf);
125✔
593

122✔
594
    return $feat->strand == 1 ? $ie->{fifth_base_splice_site} : $ie->{fifth_base_splice_site_reverse};
595
}
596

597
sub splice_donor_region_variant {
103✔
598
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
103✔
599
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
103✔
600
    $feat ||= $bvfo->feature;
103✔
601
    my $ie = $bvfoa->_intron_effects($feat, $bvfo, $bvf);
602
    
103✔
603
    return 0 if splice_donor_5th_base_variant(@_);
604
    return $feat->strand == 1 ? $ie->{donor_region_splice_site} : $ie->{donor_region_splice_site_reverse};
605
}
606

75✔
607
sub splice_polypyrimidine_tract_variant  {
75✔
608
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
609
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
75✔
610
    $feat ||= $bvfo->feature;
60✔
611
    my $ie = $bvfoa->_intron_effects($feat, $bvfo, $bvf);
50✔
612

50✔
613
    return $feat->strand == 1 ? $ie->{polypyrimidine_splice_site} : $ie->{polypyrimidine_splice_site_reverse};
47✔
614
}
615

47✔
616
sub splice_region {
617
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
618
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
619
    
141✔
620
    return 0 if donor_splice_site(@_);
141✔
621
    return 0 if acceptor_splice_site(@_);
622
    return 0 if essential_splice_site(@_);
141✔
623
    return 0 if splice_donor_region_variant(@_);
624
    return 0 if splice_donor_5th_base_variant(@_);
625
    
626
    return $bvfoa->_intron_effects($feat, $bvfo, $bvf)->{splice_region};
716✔
627
}
716✔
628

716✔
629
sub within_intron {
716✔
630
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
631
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
716✔
632

633
    return $bvfoa->_intron_effects($feat, $bvfo, $bvf)->{intronic};
716✔
634
}
716✔
635

732✔
636
sub within_cds {
598✔
637
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
598✔
638
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
639
    $feat ||= $bvfo->feature;
640
    $bvf  ||= $bvfo->base_variation_feature;
641
    
642
    my $cds_coords = $bvfo->cds_coords;
643
    
644
    if (@$cds_coords > 0) {
645
        for my $coord (@$cds_coords) {
118✔
646
            if ($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
647
                if ($coord->end > 0 && $coord->start <= length($bvfo->_translateable_seq)) { 
648
                    return 1;
2✔
649
                }
650
            }
651
        }
652
    }
653

654
    # we also need to check if the vf is in a frameshift intron within the CDS
655

656
    if (defined $feat->translation &&
116✔
657
        $bvfoa->_intron_effects($feat, $bvfo, $bvf)->{within_frameshift_intron}) {
658
 
659
        return overlap(
660
            $bvf->{start}, 
47✔
661
            $bvf->{end}, 
47✔
662
            $feat->coding_region_start,
47✔
663
            $feat->coding_region_end,
664
        );
47✔
665
    }
666
        
47✔
667
    return 0;
47✔
668
}
47✔
669

47✔
670
sub within_cdna {
47✔
671
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
672
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
673
    $feat ||= $bvfo->feature;
674
    
675
    my $cdna_coords = $bvfo->cdna_coords;
676
    
677
    if (@$cdna_coords > 0) {
678
        for my $coord (@$cdna_coords) {
×
679
            if ($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
×
680
                if ($coord->end > 0 && $coord->start <= $feat->length) {
681
                    return 1;
682
                }
×
683
            }
684
        }
685
    }
686
    
47✔
687
    # we also need to check if the vf is in a frameshift intron within the cDNA
47✔
688

689
    if ($bvfoa->_intron_effects->{within_frameshift_intron}) {
47✔
690
        return within_transcript(@_); 
47✔
691
    }
47✔
692
    
47✔
693
    return 0;
694
}
695

47✔
696
sub _before_coding {
2✔
697
    my ($bvf, $tran) = @_;
698
    return 0 unless defined $tran->translation;
699
    
45✔
700
    my $bvf_s  = $bvf->{start};
701
    my $bvf_e  = $bvf->{end};
702
    my $t_s    = $tran->{start};
703
    my $cds_s  = $tran->coding_region_start;
47✔
704
    
47✔
705
    # we need to special case insertions just before the CDS start
706
    if ($bvf_s == $bvf_e+1 && $bvf_s == $cds_s) {
47✔
707
        return 1;
47✔
708
    }
47✔
709
   
47✔
710
    return overlap($bvf_s, $bvf_e, $t_s, $cds_s-1);    
711
}
712

47✔
713
sub _after_coding {
2✔
714
    my ($bvf, $tran) = @_;
715
    return 0 unless defined $tran->translation;
716
    
45✔
717
    my $bvf_s  = $bvf->{start};
718
    my $bvf_e  = $bvf->{end};
719
    my $t_e    = $tran->{end};
720
    my $cds_e  = $tran->coding_region_end;
47✔
721
    
47✔
722
    # we need to special case insertions just after the CDS end
47✔
723
    if ($bvf_s == $bvf_e+1 && $bvf_e == $cds_e) {
47✔
724
        return 1;
725
    }
47✔
726
    
727
    return overlap($bvf_s, $bvf_e, $cds_e+1, $t_e);
728
}
729

730
sub within_5_prime_utr {
47✔
731
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
732
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
733
    $feat ||= $bvfo->feature;
734
    $bvf  ||= $bvfo->base_variation_feature;
47✔
735

47✔
736
    my $five_prime_of_coding = 
47✔
737
        $feat->strand == 1 ? 
47✔
738
            _before_coding($bvf, $feat) : 
739
            _after_coding($bvf, $feat);
47✔
740
    
741
    return ( $five_prime_of_coding and within_cdna(@_) );
742
}
743

744
sub within_3_prime_utr {
47✔
745
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
746
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
747
    $feat ||= $bvfo->feature;
748
    $bvf  ||= $bvfo->base_variation_feature;
×
749
    
×
750
    my $three_prime_of_coding = 
×
751
        $feat->strand == 1 ? 
752
            _after_coding($bvf, $feat) : 
753
            _before_coding($bvf, $feat);
754
    
×
755
    return ( $three_prime_of_coding and within_cdna(@_) );
756
}
×
757

758
sub complex_indel {
×
759
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
760
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
761
    $bvf  ||= $bvfo->base_variation_feature;
762
   
2,963✔
763
    # pass the no_db flag to var_class to ensure we don't rely on the database for it 
764
    # as it may not have been set at this stage in the pipeline
765
    my $class = $bvf->var_class(1);
2,963✔
766

767
    return 0 unless $class =~ /insertion|deletion|indel/;
2,963✔
768

415✔
769
    return @{ $bvfo->cds_coords } > 1;
770
}
771

772
sub _get_peptide_alleles {
415✔
773
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
774

775
    # use cache for this method as it gets called a lot
776
    my $cache = $bvfoa->{_predicate_cache} ||= {};
777

381✔
778
    unless(exists($cache->{_get_peptide_alleles})) {
381✔
779
        my @alleles = ();
780

381✔
781
        #return () if frameshift(@_);
782

783
        if(
415✔
784
            defined $bvfoa &&
785
            (my $alt_pep = $bvfoa->peptide) &&        
786
            (my $ref_pep = _get_ref_pep(@_))
2,963✔
787
        ) {
788
            $ref_pep = '' if $ref_pep eq '-';
789
            $alt_pep = '' if $alt_pep eq '-';
790
        
381✔
791
            @alleles = ($ref_pep, $alt_pep);
381✔
792
        }
381✔
793

794
        $cache->{_get_peptide_alleles} = \@alleles;
795
    }
796

797
    return @{$cache->{_get_peptide_alleles}};
381✔
798
}
381✔
799

800
sub _get_ref_pep {
801
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
802
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
159✔
803
    my $ref_tva = $bvfo->get_reference_TranscriptVariationAllele;
804

159✔
805
    ## The shift hash has to be added to the reference tva before calculating the relevant peptide string.
806
    ## This is because we don't precalculate the shift_hash for reference tvas for speed and for
119✔
807
    ## variants with multiple alternate alleles with potentially different shift lengths.
808
    $ref_tva->{shift_hash} = $bvfoa->{shift_hash} if (defined($bvfoa->{shift_hash}));
119✔
809
    return $ref_tva->peptide;
810
}
44✔
811

44✔
812
sub _get_codon_alleles {
813
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
44✔
814
    
815
    return () if frameshift(@_);
44✔
816

44✔
817
    my $alt_codon = $bvfoa->codon;
818
    
44✔
819
    return () unless defined $alt_codon;
820

821
    $bvfo ||= $bvfoa->base_variation_feature_overlap;    
822
    my $ref_codon = $bvfo->get_reference_TranscriptVariationAllele->codon;
2✔
823
    
2✔
824
    return () unless defined $ref_codon;
825
    
2✔
826
    $ref_codon = '' if $ref_codon eq '-';
827
    $alt_codon = '' if $alt_codon eq '-';
2✔
828
    
829
    return ($ref_codon, $alt_codon);
2✔
830
}
2✔
831

832
sub _get_alleles {
2✔
833
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
834
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
2✔
835
    
2✔
836
    my $ref_tva = $bvfo->get_reference_VariationFeatureOverlapAllele;
837
    
2✔
838
    return () unless defined ($ref_tva);
839
    
840
    my $ref_allele = $ref_tva->variation_feature_seq;
841
    my $alt_allele = $bvfoa->variation_feature_seq;
801✔
842
    
843
    return () unless defined($ref_allele) and defined($alt_allele);
844
    
801✔
845
    $ref_allele = '' if $ref_allele eq '-';
846
    $alt_allele = '' if $alt_allele eq '-';
801✔
847
    
848
    return ($ref_allele, $alt_allele);
849
}
440✔
850

851
sub start_lost {
440✔
852
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
853

16✔
854
    # use cache for this method as it gets called a lot
16✔
855
    my $cache = $bvfoa->{_predicate_cache} ||= {};
16✔
856

857
    unless(exists($cache->{start_lost})) {
858

16✔
859
        # default
16✔
860
        $cache->{start_lost} = 0;
11✔
861

2✔
862
        return 0 unless _overlaps_start_codon(@_);
863

2✔
864
        $bvfo ||= $bvfoa->base_variation_feature_overlap;
865
        $feat ||= $bvfo->feature;
866
        $bvf  ||= $bvfo->base_variation_feature;
2✔
867
        
868
        # sequence variant
869
        if($bvfo->isa('Bio::EnsEMBL::Variation::TranscriptVariation')) {
870
            return $cache->{start_lost} = 1 if _ins_del_start_altered(@_) && !(inframe_insertion(@_) || inframe_deletion(@_));
871
            return $cache->{start_lost} = 1 if _inv_start_altered(@_);
872
            my ($ref_pep, $alt_pep) = _get_peptide_alleles(@_);
873
        
874
            return 0 unless $ref_pep;
875
            return 0 unless $alt_pep && $alt_pep ne 'X';
×
876
            
×
877
            # allow for introducing additional bases that retain start codon e.g. atg -> aCGAtg
878
            $cache->{start_lost} = (
×
879
                ($bvfo->translation_start == 1) and
×
880
                ($alt_pep !~ /\Q$ref_pep\E$/) and 
881
                ($alt_pep !~ /^\Q$ref_pep\E/)
882
            );
×
883
        }
884
        
885
        # structural variant
886
        elsif($bvfo->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariation')) {        
887
            my ($tr_crs, $tr_cre) = ($feat->coding_region_start, $feat->coding_region_end);
×
888
            return 0 unless defined($tr_crs) && defined($tr_cre);
889
            
890
            if($feat->strand == 1) {
891
                $cache->{start_lost} = overlap($tr_crs, $tr_crs + 2, $bvf->{start}, $bvf->{end});
363✔
892
            }
893
            else {
894
                $cache->{start_lost} = overlap($tr_cre-2, $tr_cre, $bvf->{start}, $bvf->{end});
895
            }
11✔
896
        }
897
        
11✔
898
        else {
11✔
899
            return 0;
11✔
900
        }
901
    }
11✔
902

11✔
903
    return $cache->{start_lost};
11✔
904
}
905

11✔
906
sub _inv_start_altered {
907
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;   
908
    # use cache for this method as it gets called a lot
11✔
909
    my $cache = $bvfoa->{_predicate_cache} ||= {};
11✔
910
    unless(exists($cache->{inv_start_altered})) {
911
        $cache->{inv_start_altered} = 0;
912

11✔
913
        return 0 if $bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele');
11✔
914
        return 0 unless $bvfoa->seq_is_unambiguous_dna();
11✔
915
        return 0 unless _overlaps_start_codon(@_);
10✔
916

10✔
917
        $bvfo ||= $bvfoa->base_variation_feature_overlap;
10✔
918

10✔
919
        # get cDNA coords
920
        my ($cdna_start, $cdna_end) = ($bvfo->cdna_start, $bvfo->cdna_end);
10✔
921
        return 0 unless $cdna_start && $cdna_end;
10✔
922

10✔
923
        # make and edit UTR + translateable seq
924
        my $translateable = $bvfo->_translateable_seq();
10✔
925
        my $utr = $bvfo->_five_prime_utr();
10✔
926
        return 0 unless $utr;
927
        my $utr_and_translateable = ($utr ? $utr->seq : '').$translateable;
10✔
928
        my $shifting_offset = defined($bvfoa->{shift_hash}) ? $bvfoa->{shift_hash}->{shift_length} : 0;
929
        $cdna_start += $shifting_offset;
930
        $cdna_end += $shifting_offset;
1✔
931

932
        return 0 if($cdna_end > length($utr_and_translateable));
933

934
        my $vf_feature_seq = $bvfoa->feature_seq;
386✔
935
        $vf_feature_seq = '' if $vf_feature_seq eq '-';
936
        my $atg_start = length($utr->seq);
386✔
937

386✔
938
        substr($utr_and_translateable, $cdna_start - 1, ($cdna_end - $cdna_start) + 1) = $vf_feature_seq;
939
        my $new_sc = substr($utr_and_translateable, $atg_start, 3);
386✔
940

941
        return $cache->{inv_start_altered} = 1 if $new_sc ne 'ATG';
385✔
942
    }
943

385✔
944
    return $cache->{inv_start_altered};
945
}
946

947
sub start_retained_variant {
603✔
948
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
949

603✔
950
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
951
    $bvf  ||= $bvfo->base_variation_feature;
603✔
952
    # structural variants don't have an allele string 
440✔
953
    return 0 if ($bvf->allele_string && ($bvf->allele_string eq 'COSMIC_MUTATION' || $bvf->allele_string eq 'HGMD_MUTATION'));
954

440✔
955
    my $pre = $bvfoa->_pre_consequence_predicates;
440✔
956

440✔
957
    return 0 unless _overlaps_start_codon(@_);
958
    if ($pre->{snp}) {
362✔
959
        return !_snp_start_altered(@_);
362✔
960
    } else { 
362✔
961
        return !_ins_del_start_altered(@_);
362✔
962
    }
362✔
963
}
964

357✔
965
sub _overlaps_start_codon {
966
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
967

968
    my $cache = $bvfoa->{_predicate_cache} ||= {};
969

970
    unless(exists($cache->{overlaps_start_codon})) {
520✔
971
        $cache->{overlaps_start_codon} = 0;
972

973
        $bvfo ||= $bvfoa->base_variation_feature_overlap;
974
        $feat ||= $bvfo->feature;
36✔
975
        return 0 if grep {$_->code eq 'cds_start_NF'} @{$feat->get_all_Attributes()};
976

977
        my ($cdna_start, $cdna_end) = ($bvfo->cdna_start_unshifted, $bvfo->cdna_end_unshifted);
36✔
978
        my $shifting_offset = defined($bvfoa->{shift_hash}) ? $bvfoa->{shift_hash}->{shift_length} : 0;
979
        $cdna_start += $shifting_offset;
36✔
980
        $cdna_end += $shifting_offset;
16✔
981
        return 0 unless $cdna_start && $cdna_end;
982
        
16✔
983
        $cache->{overlaps_start_codon} = overlap(
16✔
984
            $cdna_start, $cdna_end,
16✔
985
            $feat->cdna_coding_start, $feat->cdna_coding_start + 2
986
        );
16✔
987
    }
16✔
988

989
    return $cache->{overlaps_start_codon};
8✔
990
}
991

992
sub _snp_start_altered {
8✔
993
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
8✔
994

995
    my $cache = $bvfoa->{_predicate_cache} ||= {};
996

8✔
997
    unless(exists($cache->{snp_start_altered})) {
8✔
998
        $cache->{snp_start_altered} = 1;
8✔
999

1000
        return 0 if $bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele');
8✔
1001
        return 0 unless $bvfoa->seq_is_unambiguous_dna();
8✔
1002

1003
        $bvfo ||= $bvfoa->base_variation_feature_overlap;
8✔
1004

1005
        # get cDNA coords
1006
        my ($cdna_start, $cdna_end) = ($bvfo->cdna_start, $bvfo->cdna_end);
8✔
1007
        return 0 unless $cdna_start && $cdna_end;
1008

8✔
1009
        # make and edit UTR + translateable seq
1010
        my $translateable = $bvfo->_translateable_seq();
1011
        my $utr = $bvfo->_five_prime_utr();
28✔
1012
        my $utr_and_translateable = ($utr ? $utr->seq : '').$translateable;
1013

1014
        my $vf_feature_seq = $bvfoa->feature_seq;
1015
        $vf_feature_seq = '' if $vf_feature_seq eq '-';
311✔
1016

1017
        substr($utr_and_translateable, $cdna_start - 1, ($cdna_end - $cdna_start) + 1) = $vf_feature_seq;
311✔
1018
        
1019
        # get the aa now in place of prevoius start codon and see it is ATG
275✔
1020
        # it can happen for non-ATG variant (e.g - gene WT1, transcript ENST00000332351)
1021
        my $aa_replacing_start_codon = substr($utr_and_translateable, 0 - length($translateable), 3);
1022
        $cache->{snp_start_altered} = 0 if $aa_replacing_start_codon eq "ATG";
1023
    }
238✔
1024

1025
    return $cache->{snp_start_altered};
238✔
1026
}
1027

234✔
1028
sub _ins_del_start_altered {
231✔
1029
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
225✔
1030

224✔
1031
    # use cache for this method as it gets called a lot
1032
    my $cache = $bvfoa->{_predicate_cache} ||= {};
223✔
1033

1034
    unless(exists($cache->{ins_del_start_altered})) {
1035
        $cache->{ins_del_start_altered} = 0;
1036

66✔
1037
        return 0 if $bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele');
66✔
1038
        return 0 unless $bvfoa->seq_is_unambiguous_dna();
66✔
1039
        return 0 unless _overlaps_start_codon(@_);
1040

1041
        my $pre = $bvfoa->_pre_consequence_predicates;
66✔
1042
        return 0 unless $pre->{increase_length} || $pre->{decrease_length};
46✔
1043

1044
        $bvfo ||= $bvfoa->base_variation_feature_overlap;
46✔
1045

43✔
1046
        # get cDNA coords
12✔
1047
        my ($cdna_start, $cdna_end) = ($bvfo->cdna_start, $bvfo->cdna_end);
1048
        return 0 unless $cdna_start && $cdna_end;
12✔
1049
        
1050
        # make and edit UTR + translateable seq
12✔
1051
        my $translateable = $bvfo->_translateable_seq();
1052
        my $utr = $bvfo->_five_prime_utr();
1053
        my $utr_and_translateable = ($utr ? $utr->seq : '').$translateable;
1054

12✔
1055
        my $vf_feature_seq = $bvfoa->feature_seq;
1056
        $vf_feature_seq = '' if $vf_feature_seq eq '-';
1057

1058
        substr($utr_and_translateable, $cdna_start - 1, ($cdna_end - $cdna_start) + 1) = $vf_feature_seq;
1059

12✔
1060
        # check if still retain start
1061
        if ($utr) {
12✔
1062
            my $atg_start = length($utr->seq);
1063
            my $new_sc = substr($utr_and_translateable, $atg_start, 3);
1064
            my $new_utr = substr($utr_and_translateable, 0, length($utr->seq));
1065
            return $cache->{ins_del_start_altered} if ($new_utr eq $utr->seq && $new_sc eq 'ATG');
1066
        }
1067

1068
        # sequence shorter, we know it has been altered
1069
        return $cache->{ins_del_start_altered} = 1 if length($utr_and_translateable) < length($translateable);
20✔
1070

1071
        $cache->{ins_del_start_altered} = $translateable ne substr($utr_and_translateable, 0 - length($translateable));
1072
    }
×
1073

1074
    return $cache->{ins_del_start_altered};
×
1075
}
1076

×
1077
sub synonymous_variant {
1078
    my ($ref_pep, $alt_pep) = _get_peptide_alleles(@_);
1079
    
×
1080
    return 0 unless $ref_pep;
×
1081

1082
    return ( ($alt_pep eq $ref_pep) and (not stop_retained(@_) and not start_retained_variant(@_) and ($alt_pep !~ /X/) and ($ref_pep !~ /X/)) );
1083
}
1084

1085
sub missense_variant {
1086
    my ($ref_pep, $alt_pep) = _get_peptide_alleles(@_);
×
1087
    
1088
    return 0 unless defined $ref_pep;
1089
    
1090
    return 0 if start_lost(@_);
1091
    return 0 if stop_lost(@_);
×
1092
    return 0 if stop_gained(@_);
1093
    return 0 if partial_codon(@_);
1094
    return 0 if stop_retained(@_); # added because a variant can not be stop_retained and misense 
1095
    
1096
    # also need to check that the translated sequence is not the same i think . 
138✔
1097
    return ( $ref_pep ne $alt_pep ) && ( length($ref_pep) == length($alt_pep)  );
138✔
1098
}
138✔
1099

1100
sub inframe_insertion {
1101
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
138✔
1102
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
118✔
1103
    $bvf  ||= $bvfo->base_variation_feature;
1104

113✔
1105
    # sequence variant
1106
    if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
113✔
1107
        my ($ref_codon, $alt_codon) = _get_codon_alleles(@_);
29✔
1108

1109
        return 0 if start_lost(@_);
1110
        return 0 unless defined $ref_codon;
19✔
1111
        return 0 unless ( length($alt_codon) > length ($ref_codon) );
1112

1113
        my ($ref_pep, $alt_pep) = _get_peptide_alleles(@_);
8✔
1114

1115
        return 0 unless defined($ref_pep) && defined($alt_pep);
1116

1117
        # not an inframe insertion if the inserted AA is before the start codon
8✔
1118
        # we can use start_retained to check this
1119
        return 0 if start_retained_variant(@_) && $alt_pep =~ /\Q$ref_pep\E$/;
1120

1121
        # if we have a stop codon in the alt peptide
1122
        # trim off everything after it
1123
        # this allows us to detect inframe insertions that retain a stop
1124
        $alt_pep =~ s/\*.+/\*/;
20✔
1125

1126
        return 1 if ($alt_pep =~ /^\Q$ref_pep\E/) || ($alt_pep =~ /\Q$ref_pep\E$/);
12✔
1127

12✔
1128
    }
1129
    
1130
    # structural variant
1131
    elsif($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
1132

1133
        # TO BE DONE, NO WAY OF KNOWING WHAT SEQUENCE IS INSERTED YET
12✔
1134
        return 0;
1135
        
1136
        # must be an insertion
1137
        return 0 unless insertion(@_);
1138
        
1139
        my $cds_coords = $bvfo->cds_coords;
×
1140
        
1141
        if(scalar @$cds_coords == 1) {
1142
            
1143
            # wholly within exon
1144
            if($cds_coords->[0]->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
536✔
1145
                1;
1146
            }
1147
        }
536✔
1148
        
1149
        # variant partially overlaps
536✔
1150
        else {
311✔
1151
            return 0;
1152
        }
1153
    }
311✔
1154
    
1155
    else {
304✔
1156
        return 0;
1157
    }
304✔
1158
}
1159

273✔
1160
sub inframe_deletion {
1161
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
1162
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
498✔
1163
    $bvf  ||= $bvfo->base_variation_feature;
1164
    
1165
    # sequence variant
1166
    if($bvf->isa('Bio::EnsEMBL::Variation::VariationFeature')) {
704✔
1167
        return 0 if partial_codon(@_);
1168

1169
        my ($ref_codon, $alt_codon) = _get_codon_alleles(@_);
704✔
1170
        my ($ref_pep, $alt_pep) = _get_peptide_alleles(@_);
1171
        return 0 unless defined $ref_codon;
704✔
1172
        return 0 unless length($alt_codon) < length ($ref_codon);
421✔
1173
        
1174
        # simple string match
421✔
1175
        return 1 if ($ref_codon =~ /^\Q$alt_codon\E/) || ($ref_codon =~ /\Q$alt_codon\E$/);
421✔
1176

421✔
1177
        # check for internal match
1178
        ($ref_codon, $alt_codon) = @{Bio::EnsEMBL::Variation::Utils::Sequence::trim_sequences($ref_codon, $alt_codon)};
1179
        
421✔
1180
        # if nothing remains of $alt_codon,
1181
        # then it fully matched a part in the middle of $ref_codon
1182
        return length($alt_codon) == 0 && length($ref_codon) % 3 == 0;
1183
    }
1184
    
1185
    # structural variant
1186
    elsif($bvf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
1187
        
413✔
1188
        # must be a deletion
1189
        return 0 unless deletion(@_);
413✔
1190
        
381✔
1191
        my $cds_coords = $bvfo->cds_coords;
1192
        my $exons      = $bvfo->_exons;
1193
        
32✔
1194
        # in exon
1195
        return (
1196
           scalar @$cds_coords == 1 and
1197
           $cds_coords->[0]->isa('Bio::EnsEMBL::Mapper::Coordinate') and
1198
           scalar grep {complete_within_feature($bvfoa, $_, $bvfo, $bvf)} @$exons and
1199
           $bvf->length() % 3 == 0
8✔
1200
        );
1201
    }
4✔
1202
    
4✔
1203
    else {
1204
        return 0;
4✔
1205
    }
×
1206
}
1207

1208
sub stop_gained {
4✔
1209
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
1210

1211
    # use cache for this method as it gets called a lot
1212
    my $cache = $bvfoa->{_predicate_cache} ||= {};
1213

×
1214
    unless(exists($cache->{stop_gained})) {
1215
        $cache->{stop_gained} = 0;
1216
        
1217
        ## check for inframe insertion before stop 
700✔
1218
        return 0 if stop_retained(@_);
1219

1220
        my ($ref_pep, $alt_pep) = _get_peptide_alleles(@_);
1221
        
760✔
1222
        return 0 unless defined $ref_pep;
1223

1224
        $cache->{stop_gained} = ( ($alt_pep =~ /\*/) and ($ref_pep !~ /\*/) );
760✔
1225
    }
1226

760✔
1227
    return $cache->{stop_gained};
314✔
1228
}
314✔
1229

1230
sub stop_lost {
314✔
1231
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
1232

313✔
1233
    # use cache for this method as it gets called a lot
1234
    my $cache = $bvfoa->{_predicate_cache} ||= {};
313✔
1235
    
1236
    unless(exists($cache->{stop_lost})) {
313✔
1237
        $cache->{stop_lost} = 0;
1238

313✔
1239
        $bvfo ||= $bvfoa->base_variation_feature_overlap;
1240
        $bvf  ||= $bvfo->base_variation_feature;
313✔
1241
        $feat ||= $bvfo->feature;
274✔
1242
        
1243
        # sequence variant
1244
        if($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele')) {
7✔
1245
            
1246
            # special case frameshift
1247
    #        if(frameshift(@_)) {
1248
    #          my $ref_pep = _get_ref_pep(@_);
1249
    #          return $ref_pep && $ref_pep =~ /\*/;
6✔
1250
    #        }
1251
            
1252
            my ($ref_pep, $alt_pep) = _get_peptide_alleles(@_);
1✔
1253
            if(defined($ref_pep) && defined($alt_pep)) {
1254
                $cache->{stop_lost} = ( ($alt_pep !~ /\*/) and ($ref_pep =~ /\*/) );
1✔
1255
            }
1256
            else {
1257
                $cache->{stop_lost} = _ins_del_stop_altered(@_);
1258
            }
39✔
1259
        }
1260
        
1261
        # structural variant
1262
        elsif($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele')) {
492✔
1263
            return 0 unless deletion(@_);
1264
            
1265
            my ($tr_crs, $tr_cre) = ($feat->coding_region_start, $feat->coding_region_end);
1266
            return 0 unless defined($tr_crs) && defined($tr_cre);
58✔
1267
            
1268
            if($feat->strand == 1) {
58✔
1269
                $cache->{stop_lost} = overlap($tr_cre-2, $tr_cre, $bvf->{start}, $bvf->{end});
1270
            }
58✔
1271
            else {
36✔
1272
                $cache->{stop_lost} = overlap($tr_crs, $tr_crs + 2, $bvf->{start}, $bvf->{end});
1273
            }
36✔
1274
        }
36✔
1275
        
36✔
1276
        else {
1277
            return 0;
22✔
1278
        }
22✔
1279
    }
1280

17✔
1281
    return $cache->{stop_lost};
1282
}
1283

1284
sub stop_retained {
1285
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
1286

39✔
1287
    # use cache for this method as it gets called a lot
1288
    my $cache = $bvfoa->{_predicate_cache} ||= {};
1289

1290
    return 0 if partial_codon(@_);
37✔
1291
    return 0 if stop_lost(@_);
1292

1293
    unless(exists($cache->{stop_retained})) {
37✔
1294
        $bvfo ||= $bvfoa->base_variation_feature_overlap;
1295
        $bvf  ||= $bvfo->base_variation_feature;
37✔
1296
        # structural variants don't have an allele string
34✔
1297
        return 0 if ($bvf->allele_string && ($bvf->allele_string eq 'COSMIC_MUTATION' || $bvf->allele_string eq 'HGMD_MUTATION'));
1298

34✔
1299
        $cache->{stop_retained} = 0;
34✔
1300

24✔
1301
        $bvfo ||= $bvfoa->base_variation_feature_overlap;
1302

5✔
1303
        my $pre = $bvfoa->_pre_consequence_predicates;
5✔
1304

1305
        my ($ref_pep, $alt_pep) = _get_peptide_alleles(@_);
5✔
1306

1307
        if(defined($alt_pep) && $alt_pep ne '') {
1308
         
5✔
1309
          ## handle inframe insertion of a stop just before the stop (no ref peptide)
5✔
1310
          $cache->{stop_retained} = ref_eq_alt_sequence(@_);
1311
        }
1312
        else {
5✔
1313
            $cache->{stop_retained} = ($pre->{increase_length} || $pre->{decrease_length}) && _overlaps_stop_codon(@_) && !_ins_del_stop_altered(@_);
5✔
1314
        }
1315

5✔
1316
    }
1317
    
5✔
1318
    return $cache->{stop_retained};
5✔
1319
}
1320

1321
sub ref_eq_alt_sequence {
1322
   my ($bvfoa, $feat, $bvfo, $bvf) = @_; 
5✔
1323
   
1324
   $bvfo ||= $bvfoa->base_variation_feature_overlap;
1325
   my $ref_seq = $bvfo->_peptide;
5✔
1326

1327
   my $mut_seq = $ref_seq;
1328
   my $tl_start = $bvfo->translation_start;
1329
   my $tl_end = $bvfo->translation_end;
4✔
1330
   
1331
   my ($ref_pep, $alt_pep) = _get_peptide_alleles(@_);
1332

1333
   return 0 if $ref_pep eq "X" && $alt_pep eq "X"; # this is to account for incomplete coding terminal;
1334

1335
   # this is to account for synonymous variant if $ref_pep eq $alt_pep 
1336
   # as there is no resulting change to the amino acid sequence, it is not stop_retained
1337
   #return 0 if $ref_pep ne "*" && $alt_pep ne "*" && $ref_pep eq $alt_pep;
4✔
1338

1339
   # this is a logic from the former logic 
1340
   return 1 if ($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele') && defined($ref_seq) && $tl_start > length($ref_seq) && $alt_pep =~ /^\*/);
7✔
1341

1342
   substr($mut_seq, $tl_start-1, $tl_end - $tl_start + 1) = $alt_pep; # creating a mutated sequence from the ref sequence. 
1343

1344
   my $mut_substring = substr($mut_seq, 0, length($ref_seq)); # getting a substring up to the length of the ref sequence for comparison from index 0 to the length of the ref seq;
510✔
1345
   
510✔
1346
   my $final_stop = substr($mut_seq, length($ref_seq)) if length($ref_seq) < length($mut_seq); # getting the length of the $mut_seq from the length of the ref_seq to the end 
1347
   
1348
   my $final_stop_length = length($final_stop) if defined($final_stop) ne '';
510✔
1349
   
1350
   # 1 is if the ref_pep and the first letter of the alt_pep is the same and the alt_pep has * in it 
486✔
1351
   # 2 is the ref_seq eq $mut_substring and the final stop length is less than 3
1352
   # 3 is * in ref_pep and the same index position exists for both the ref and alt pep
481✔
1353
   return 1 if ( ($ref_pep eq substr($alt_pep, 0, 1) && $alt_pep =~ /\*/) ||
1354
       ($ref_seq eq $mut_substring && defined($final_stop_length) && $final_stop_length < 3) || ( $ref_pep =~ /\*/ && (index($ref_pep, "*") + 1 == index($alt_pep, "*") + 1) ));
361✔
1355
   return 0;
1356
}
361✔
1357

1358
sub _overlaps_stop_codon {
1359
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
1360

361✔
1361
    my $cache = $bvfoa->{_predicate_cache} ||= {};
1362

349✔
1363
    unless(exists($cache->{overlaps_stop_codon})) {
1364
        $cache->{overlaps_stop_codon} = 0;
1365

1366
        $bvfo ||= $bvfoa->base_variation_feature_overlap;
1367
        $feat ||= $bvfo->feature;
24✔
1368
        return 0 if grep {$_->code eq 'cds_end_NF'} @{$feat->get_all_Attributes()};
1369

1370
        my ($cdna_start, $cdna_end) = ($bvfo->cdna_start, $bvfo->cdna_end);
1371
        return 0 unless $cdna_start && $cdna_end;
1372
        
1373
        $cache->{overlaps_stop_codon} = overlap(
1374
            $cdna_start, $cdna_end,
24✔
1375
            $feat->cdna_coding_end - 2, $feat->cdna_coding_end
1376
        );
1377
    }
1378

1379
    return $cache->{overlaps_stop_codon};
1380
}
1381

1382
sub _ins_del_stop_altered {
×
1383
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
1384

1385
    # use cache for this method as it gets called a lot
1386
    my $cache = $bvfoa->{_predicate_cache} ||= {};
1387

1,139✔
1388
    unless(exists($cache->{ins_del_stop_altered})) {
1389
        $cache->{ins_del_stop_altered} = 0;
1390

1,139✔
1391
        return 0 if $bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele');
1392
        return 0 unless $bvfoa->seq_is_unambiguous_dna();
1,139✔
1393
        return 0 unless _overlaps_stop_codon(@_);
1394

1395
        my $pre = $bvfoa->_pre_consequence_predicates;
432✔
1396
        return 0 unless $pre->{increase_length} || $pre->{decrease_length};
1397

432✔
1398
        $bvfo ||= $bvfoa->base_variation_feature_overlap;
1399

432✔
1400
        # get cDNA coords and CDS start
1401
        my ($cdna_start, $cdna_end, $cds_start) = ($bvfo->cdna_start, $bvfo->cdna_end, $bvfo->cds_start);
419✔
1402
        return 0 unless $cdna_start && $cdna_end && $cds_start;
1403

419✔
1404
        # make and edit UTR + translateable seq
1405
        my $translateable = $bvfo->_translateable_seq();
419✔
1406
        my $utr = $bvfo->_three_prime_utr();
1407

419✔
1408
        my $utr_and_translateable = $translateable.($utr ? $utr->seq : '');
1409

1410
        my $vf_feature_seq = $bvfoa->feature_seq;
1,126✔
1411
        $vf_feature_seq = '' if $vf_feature_seq eq '-';
1412

1413
        # use CDS start to anchor the edit
1414
        # and cDNA coords to get the length (could use VF, but have already retrieved cDNA coords)
716✔
1415
        substr($utr_and_translateable, $cds_start - 1, ($cdna_end - $cdna_start) + 1) = $vf_feature_seq;
716✔
1416

716✔
1417
        # new sequence shorter, we know it has been altered
1418
        return $cache->{ins_del_stop_altered} = 1 if length($utr_and_translateable) < length($translateable);
716✔
1419

1420
        # now we need the codon from the new seq at the equivalent end pos from translateable
1421
        # and to translate it to check if it is still a stop
716✔
1422
        my $pep = Bio::Seq->new(
1423
            -seq        => substr($utr_and_translateable, length($translateable) - 3, 3),
692✔
1424
            -moltype    => 'dna',
1425
            -alphabet   => 'dna',
1426
        )->translate(
1427
            undef, undef, undef, $bvfo->_codon_table
1428
        )->seq;
1429
        $cache->{ins_del_stop_altered} = !($pep && $pep eq '*');
1430
    }
1431

1432
    return $cache->{ins_del_stop_altered};
1433
}
1434

1435
sub frameshift {
1436
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
1437
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
1438

1439
    # sequence variant
1440
    if($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele')) {
24✔
1441

1442
        return 0 if partial_codon(@_);
1443
        return 0 if stop_retained(@_);
1444
    
×
1445
        return 0 unless defined $bvfo->cds_start && defined $bvfo->cds_end;
1446
        
1447
        my $var_len = $bvfo->cds_end - $bvfo->cds_start + 1;
1448
    
1449
        my $allele_len = $bvfoa->seq_length;
1450
    
1451
        # if the allele length is undefined then we can't call a frameshift
15✔
1452
    
15✔
1453
        return 0 unless defined $allele_len;
1454
    
1455
        return abs( $allele_len - $var_len ) % 3;
1456
    }
1457
    
1458
    # structural variant
×
1459
    elsif($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele')) {
×
1460
        my $exons = $bvfo->_exons;
1461
        
1462
        return (
1463
            (
1464
                deletion(@_) or
1465
                copy_number_loss(@_)
1466
            ) and
1467
            scalar grep {complete_within_feature($bvfoa, $_, $bvfo, $bvf)} @$exons and
1468
            $bvf->length % 3 != 0
1469
        );
1470
        
1471
        # TODO INSERTIONS
1472
    }
1473
    
1474
    else {
1475
        return 0;
1476
    }
1477
}
1478

1479
sub partial_codon {
1480
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
1481

1482
    # use cache for this method as it gets called a lot
16✔
1483
    my $cache = $bvfoa->{_predicate_cache} ||= {};
16✔
1484

1485
    unless(exists($cache->{_partial_codon})) {
1486

1487
        # default
1488
        $cache->{_partial_codon} = 0;
1489

1490
        $bvfo ||= $bvfoa->base_variation_feature_overlap;
1491
   
1492
        return 0 unless defined $bvfo->translation_start;
1493

1494
        my $cds_length = length $bvfo->_translateable_seq;
1495

1496

1497
        my $codon_cds_start = ($bvfo->translation_start * 3) - 2;
1498

1499
        my $last_codon_length = $cds_length - ($codon_cds_start - 1);
×
1500
        
1501
        $cache->{_partial_codon} = ( $last_codon_length < 3 and $last_codon_length > 0 );
×
1502
    }
×
1503

1504
    return $cache->{_partial_codon};
×
1505
}
1506

1507
sub coding_unknown {
1508
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
1509
    $bvfo ||= $bvfoa->base_variation_feature_overlap;
1510
    $bvf  ||= $bvfo->base_variation_feature;
1511

1512
    return 0 if complete_overlap_feature(@_);
1513

1514
    # sequence variant
1515
    if($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptVariationAllele')) {
1516
        return (
1517
            within_cds(@_) and (
1518
                (not $bvfoa->peptide) or
1519
                (not _get_peptide_alleles(@_)) or
1520
                ($bvfoa->peptide =~ /X/) or
1521
                ((_get_peptide_alleles(@_))[0] =~ /X/)
1522
            ) and (
1523
                not (
1524
                    frameshift(@_) or inframe_deletion(@_) or protein_altering_variant(@_) or
1525
                    start_retained_variant(@_) or start_lost(@_) or
1526
                    stop_retained(@_) or stop_lost(@_)
1527
                )
1528
            )
1529
        );
1530
    }
1531
    
1532
    # structural variant
1533
    elsif($bvfoa->isa('Bio::EnsEMBL::Variation::TranscriptStructuralVariationAllele')) {
1534
        return (within_cds(@_) and not(inframe_insertion(@_) or inframe_deletion(@_) or frameshift(@_)));
1535
    }
1536
    
1537
    else {
1538
        return 0;
1539
    }
1540
}
1541

1542
#package Bio::EnsEMBL::Variation::RegulatoryFeatureVariationAllele;
1543

1544
sub within_regulatory_feature {
1545
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
1546
    return within_feature(@_);
1547
}
1548

1549
#package Bio::EnsEMBL::Variation::ExternalFeatureVariationAllele;
1550

1551
sub within_external_feature {
1552
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
1553
    return (within_feature(@_) and (not within_miRNA_target_site(@_)));
1554
}
1555

1556
#sub within_miRNA_target_site {
1557
#    my $efva = shift;
1558
#
1559
#    my $fset = $efva->variation_feature_overlap->feature->feature_set;
1560
#
1561
#    return ($fset && $fset->name eq 'miRanda miRNA targets');
1562
#}
1563

1564
#package Bio::EnsEMBL::Variation::MotifFeatureVariationAllele;
1565

1566
#sub within_motif_feature {
1567
#    my $mfva = shift;
1568
#    return (
1569
#        within_feature($mfva) and
1570
#        !increased_binding_affinity($mfva) and
1571
#        !decreased_binding_affinity($mfva) 
1572
#    );
1573
#}
1574

1575
sub within_motif_feature {
1576
    my ($bvfoa, $feat, $bvfo, $bvf) = @_;
1577
    return within_feature(@_);
1578
}
1579

1580
#sub increased_binding_affinity {
1581
#    my $mfva = shift;
1582
#    my $change = $mfva->binding_affinity_change;
1583
#    return (within_feature($mfva) and (defined $change) and ($change > 0));
1584
#}
1585
#
1586
#sub decreased_binding_affinity {
1587
#    my $mfva = shift;
1588
#    my $change = $mfva->binding_affinity_change;
1589
#    return (within_feature($mfva) and (defined $change) and ($change < 0));
1590
#}
1591

1592
sub contains_entire_feature {
1593
    my $vfo = shift;
1594

1595
    my $bvf  = $vfo->base_variation_feature;
1596
    my $feat = $vfo->feature;
1597

1598
    return ( ($bvf->{start} <= $feat->{start}) && ($bvf->{end} >= $feat->{end}) ); 
1599
}
1600

1601
1;
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc