• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Ensembl / ensembl-vep / #610835618

02 Oct 2023 09:04AM UTC coverage: 98.187%. First build
#610835618

Pull #1470

travis-ci

Pull Request #1470: Custom annotations: summary statistics + filter by number of records

139 of 139 new or added lines in 9 files covered. (100.0%)

11320 of 11529 relevant lines covered (98.19%)

64059.97 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.54
/modules/Bio/EnsEMBL/VEP/AnnotationSourceAdaptor.pm
1
=head1 LICENSE
2

3
Copyright [2016-2025] EMBL-European Bioinformatics Institute
4

5
Licensed under the Apache License, Version 2.0 (the "License");
6
you may not use this file except in compliance with the License.
7
You may obtain a copy of the License at
8

9
     http://www.apache.org/licenses/LICENSE-2.0
10

11
Unless required by applicable law or agreed to in writing, software
12
distributed under the License is distributed on an "AS IS" BASIS,
13
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
See the License for the specific language governing permissions and
15
limitations under the License.
16

17
=cut
18

19

20
=head1 CONTACT
21

22
 Please email comments or questions to the public Ensembl
23
 developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
24

25
 Questions may also be sent to the Ensembl help desk at
26
 <http://www.ensembl.org/Help/Contact>.
27

28
=cut
29

30
# EnsEMBL module for Bio::EnsEMBL::VEP::AnnotationSourceAdaptor
31
#
32
#
33

34
=head1 NAME
35

36
Bio::EnsEMBL::VEP::AnnotationSourceAdaptor - gets all AnnotationSources from initial config
37

38
=head1 SYNOPSIS
39

40
my $asa = Bio::EnsEMBL::VEP::AnnotationSourceAdaptor->new({
41
  config => $config
42
});
43

44
$sources = $asa->get_all();
45

46
=head1 DESCRIPTION
47

48
Factory for generating AnnotationSources from configuration in supplied Bio::EnsEMBL::VEP::Config.
49

50
Can create database- and file-based sources directly; uses Bio::EnsEMBL::VEP::CacheDir to generate
51
cache-based sources.
52

53
=head1 METHODS
54

55
=cut
56

57

58
use strict;
132✔
59
use warnings;
132✔
60

61
package Bio::EnsEMBL::VEP::AnnotationSourceAdaptor;
62

63
use base qw(Bio::EnsEMBL::VEP::BaseVEP);
132✔
64

65
use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
132✔
66
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
132✔
67
use Bio::EnsEMBL::VEP::CacheDir;
132✔
68
use Bio::EnsEMBL::VEP::AnnotationSource::Database::RegFeat;
132✔
69
use Bio::EnsEMBL::VEP::AnnotationSource::Database::Variation;
132✔
70
use Bio::EnsEMBL::VEP::AnnotationSource::Database::StructuralVariation;
132✔
71
use Bio::EnsEMBL::VEP::AnnotationSource::File;
132✔
72

73
use LWP::Simple;
132✔
74

75
=head2 get_all
76

77
  Example    : $sources = $asa->get_all()
78
  Description: Gets all AnnotationSources
79
  Returntype : arrayref of Bio::EnsEMBL::VEP::AnnotationSource
80
  Exceptions : none
81
  Caller     : Bio::EnsEMBL::VEP::BaseRunner
82
  Status     : Stable
83

84
=cut
85

86
sub get_all {
87
  my $self = shift;
1,384✔
88

89
  return [
90
    sort {($b->{can_filter_vfs} || 0) <=> ($a->{can_filter_vfs} || 0)}
516✔
91
    (
92
      @{$self->get_all_from_cache},
1,384✔
93
      @{$self->get_all_from_database},
1,384✔
94
      @{$self->get_all_custom},
1,384✔
95
    )
96
  ];
97
}
98

99

100
=head2 get_all_from_cache
101

102
  Example    : $sources = $asa->get_all_from_cache()
103
  Description: Gets all cache AnnotationSources
104
  Returntype : arrayref of Bio::EnsEMBL::VEP::AnnotationSource
105
  Exceptions : none
106
  Caller     : get_all()
107
  Status     : Stable
108

109
=cut
110

111
sub get_all_from_cache {
112
  my $self = shift;
1,388✔
113

114
  return [] unless $self->param('cache');
1,388✔
115

116
  my $cache_dir_obj = Bio::EnsEMBL::VEP::CacheDir->new({
1,308✔
117
    config   => $self->config,
118
    root_dir => $self->param('dir_cache') || $self->param('dir'),
119
    dir      => $self->param('full_cache_dir')
120
  });
121

122
  return $cache_dir_obj->get_all_AnnotationSources();
1,308✔
123
}
124

125

126
=head2 get_all_from_database
127

128
  Example    : $sources = $asa->get_all_from_database()
129
  Description: Gets all database AnnotationSources
130
  Returntype : arrayref of Bio::EnsEMBL::VEP::AnnotationSource
131
  Exceptions : none
132
  Caller     : get_all()
133
  Status     : Stable
134

135
=cut
136

137
sub get_all_from_database {
138
  my $self = shift;
1,416✔
139

140
  return [] if $self->param('offline');
1,416✔
141

142
  my @as;
112✔
143

144
  # we don't want to get e.g. transcript DB sources if we have cache
145
  unless($self->param('cache') || ($self->param('custom') && !$self->param('database'))) {
112✔
146
    my $module = $self->module_prefix.'::AnnotationSource::Database::Transcript';
100✔
147
    eval "require $module";
100✔
148

149
    if($self->param('database')) {
100✔
150
      push @as, $module->new({
100✔
151
        config => $self->config,
152
        filter => $self->param('transcript_filter'),
153
        bam    => $self->param('bam'),
154
      });
155

156
      # special case merged
157
      if($self->param('merged') && $self->get_adaptor('otherfeatures', 'slice')) {
100✔
158
        my $core_type_bak = $self->param('core_type');
52✔
159
        $self->param('core_type', 'otherfeatures');
52✔
160

161
        push @as, $module->new({
52✔
162
          config => $self->config,
163
          filter => $self->param('transcript_filter'),
164
          bam    => $self->param('bam'),
165
        });
166

167
        $self->param('core_type', $core_type_bak);
52✔
168
      }
169
    }
170

171
    push @as, Bio::EnsEMBL::VEP::AnnotationSource::Database::RegFeat->new({
100✔
172
      config => $self->config,
173
    }) if $self->param('regulatory') && $self->get_adaptor('funcgen', 'RegulatoryFeature');
174

175
    push @as, Bio::EnsEMBL::VEP::AnnotationSource::Database::Variation->new({
100✔
176
      config => $self->config,
177
    }) if $self->param('check_existing') && $self->get_adaptor('variation', 'Variation');
178
  }
179

180
  # overlapping SVs
181
  # this has no cache equivalent
182
  push @as, Bio::EnsEMBL::VEP::AnnotationSource::Database::StructuralVariation->new({
112✔
183
    config => $self->config,
184
  }) if $self->param('check_svs') && $self->get_adaptor('variation', 'Variation');
185

186
  return \@as;
112✔
187
}
188

189

190
=head2 get_all_custom
191

192
  Example    : $sources = $asa->get_all_custom()
193
  Description: Gets all custom file AnnotationSources
194
  Returntype : arrayref of Bio::EnsEMBL::VEP::AnnotationSource
195
  Exceptions : none
196
  Caller     : get_all()
197
  Status     : Stable
198

199
=cut
200

201
sub get_all_custom {
202
  my $self = shift;
1,424✔
203

204
  my @as;
1,424✔
205

206
  my @VALID_OPTIONS = (
1,424✔
207
    'file',
208
    'format',
209
    'short_name',
210
    'fields',
211
    'type',
212
    'overlap_cutoff',
213
    'reciprocal',
214
    'distance',
215
    'coords',
216
    'same_type',
217
    'num_records',
218
    'summary_stats',
219
    'gff_type'
220
  );
221

1,424✔
222
  foreach my $custom_string(@{$self->param('custom') || []}) {
223
    
144✔
224
    my %hash = ();
144✔
225
    my (@params, @fields);
226

144✔
227
    if (grep(/=/, $custom_string)){
144✔
228
      @params = split /\,/, $custom_string;
229
    } else {
×
230
      ($hash{"file"}, $hash{"short_name"}, $hash{"format"}, $hash{"type"}, $hash{"coords"}, @fields) = split /\,/, $custom_string;
231
    }
232

144✔
233
    if (@params){
144✔
234
      foreach my $param(@params) {
460✔
235
        my ($key, $val) = split('=', $param);
460✔
236
        die("ERROR: Failed to parse parameter $param; Please add <VALUE_OF_PARAMETER>=$param\n") unless defined($key) && defined($val);
460✔
237
        $hash{$key} = $val;
238
      };
239

240
      # warn about invalid options
144✔
241
      my @invalid_opts;
144✔
242
      for my $opt (keys %hash) {
460✔
243
        push @invalid_opts, $opt unless grep { $opt eq $_ } @VALID_OPTIONS;
244
      }
144✔
245
      throw("ERROR: The following options are not supported for custom annotations: "
246
            . join(", ", @invalid_opts) . "\n" .
247
            "LINE: --custom $custom_string\n") if @invalid_opts;
248
    };
249

140✔
250
    throw("ERROR: No file was added for custom annotation source.\nLINE: --custom $custom_string\n") unless defined($hash{"file"});
136✔
251
    throw("ERROR: No format specified for custom annotation source.\nLINE: --custom $custom_string\n") unless defined($hash{"format"});
132✔
252
    throw("ERROR: Access to remote data files disabled\n") if $self->param('no_remote') && $hash{"file"} =~ /^(ht|f)tp:\/\/.+/;
253

64✔
254
    my $opts = {
255
      config => $self->config,
256
      file => $hash{"file"},
257
      short_name => $hash{"short_name"},
258
      format => $hash{"format"},
259
      type => $hash{"type"} || "overlap",
260
      report_coords => $hash{"coords"} || 0,
261
      overlap_cutoff => $hash{"overlap_cutoff"} || 0,
262
      distance => $hash{"distance"},
263
      same_type => $hash{"same_type"} || 0,
264
      reciprocal => $hash{"reciprocal"} || 0,
64✔
265
      num_records => $hash{"num_records"},
266
      gff_type => $hash{"gff_type"} || "transcript"
267
    };
64✔
268

64✔
269
    $opts->{overlap_def} = $opts->{reciprocal} ?
270
      "Percentage of minimum reciprocal overlap between input variant and reference variant" :
271
      "Percentage of input variant covered by reference variant";
128✔
272

128✔
273
    my $format = $hash{"format"};
60✔
274
    if(defined($format) && $format =~ /^G[TF]F$/i) {
60✔
275
      $opts->{filter} = $self->param('transcript_filter');
276
      $opts->{bam} = $self->param('bam');
277
    }
128✔
278

128✔
279
    $opts->{fields} = [split /%/, $hash{"fields"}] if $hash{"fields"};
280
    $opts->{fields} = \@fields if @fields;
128✔
281

128✔
282
    if (!defined $opts->{num_records}) {
283
      $opts->{num_records} = 50; # by default, show all values for non-SVs
×
284
    } elsif ($opts->{num_records} eq 'all') {
285
      $opts->{num_records} = 'inf';
286
    }
287

128✔
288
    # Default summary statistics: only show for BED/bigwig custom files
128✔
289
    $opts->{summary_stats} = $hash{"summary_stats"} || 'none';
290
    delete $opts->{summary_stats} if $opts->{summary_stats} eq 'none';
128✔
291

×
292
    if ( $opts->{summary_stats} ) {
293
      $opts->{summary_stats} = [split /%/, $opts->{summary_stats}];
294

×
295
      # Check invalid summary statistics
×
296
      my @invalid;
×
297
      my @stats = ('min', 'max', 'mean', 'count', 'sum');
×
298
      for my $k (@{ $opts->{summary_stats} }) {
299
        push @invalid, $k unless grep { $_ eq $k } @stats;
300
      }
×
301

×
302
      if (@invalid) {
×
303
        my $invalid_opts = join(", ", @invalid);
×
304
        my $valid_opts   = join(", ", @stats);
305
        throw("ERROR: The following summary statistics for custom annotations ".
306
              "are not supported: $invalid_opts. ".
307
              "Available options are: $valid_opts.\n".
308
              "LINE: --custom $custom_string\n");
309
      }
310
    }
128✔
311

312
    if (grep { /\#\#\#CHR\#\#\#/ } $hash{"file"}){
4✔
313

314
      my @valid_chromosomes = keys %{$self->chr_lengths} > 0 ? sort keys %{$self->chr_lengths}: ((1..22), qw(X Y MT));
4✔
315
      
8✔
316
      foreach my $chr (@valid_chromosomes){
8✔
317
        my $new_file = $hash{"file"};
8✔
318
        my $new_opts = { %$opts };
8✔
319
        $new_file =~ s/\#\#\#CHR\#\#\#/$chr/;
8✔
320
        next unless ( -e $new_file || head($new_file) );
8✔
321
        $new_opts->{file} = $new_file;
8✔
322
        push @as, Bio::EnsEMBL::VEP::AnnotationSource::File->new($new_opts);
323
      }
324

325
      # Non-match ###CHR### pattern scenario
4✔
326
      die "Error: No files with pattern " . $hash{"file"} . " were found\n" unless @as;
327
    
328
    } else {
124✔
329
      push @as, Bio::EnsEMBL::VEP::AnnotationSource::File->new($opts);
330
    }
331
  }
332

1,404✔
333
  return \@as;
334
}
335

336
1;
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc