• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Ensembl / ensembl-vep / 3246

pending completion
3246

push

travis-ci-com

web-flow
Merge pull request #1406 from nuno-agostinho/fix/large-hgvs

Use `--max_sv_size` as the HGVSg region limit

6 of 6 new or added lines in 1 file covered. (100.0%)

10520 of 10701 relevant lines covered (98.31%)

43958.88 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.55
/modules/Bio/EnsEMBL/VEP/Parser/HGVS.pm
1
=head1 LICENSE
2

3
Copyright [2016-2023] EMBL-European Bioinformatics Institute
4

5
Licensed under the Apache License, Version 2.0 (the "License");
6
you may not use this file except in compliance with the License.
7
You may obtain a copy of the License at
8

9
   http://www.apache.org/licenses/LICENSE-2.0
10

11
Unless required by applicable law or agreed to in writing, software
12
distributed under the License is distributed on an "AS IS" BASIS,
13
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
See the License for the specific language governing permissions and
15
limitations under the License.
16

17
=cut
18

19

20
=head1 CONTACT
21

22
 Please email comments or questions to the public Ensembl
23
 developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
24

25
 Questions may also be sent to the Ensembl help desk at
26
 <http://www.ensembl.org/Help/Contact>.
27

28
=cut
29

30
# EnsEMBL module for Bio::EnsEMBL::VEP::Parser::HGVS
31
#
32
#
33

34
=head1 NAME
35

36
Bio::EnsEMBL::VEP::Parser::HGVS - HGVS list input parser
37

38
=head1 SYNOPSIS
39

40
my $parser = Bio::EnsEMBL::VEP::Parser::HGVS->new({
41
  config => $config,
42
  file   => 'hgvs.txt',
43
});
44

45
my $vf = $parser->next();
46

47
=head1 DESCRIPTION
48

49
HGVS format parser.
50

51
See http://varnomen.hgvs.org/ for spec.
52

53
Variants can be g. (genomic), c. (coding transcript),
54
n. (non-coding transcript) or p. (protein), though
55
since variants are transformed to genomic coordinates some p.
56
descriptions may fail to parse if they do not unambiguously resolve
57
to a genomic postion and ref/alt.
58

59
Requires a database connection to look up reference feature
60
locations, so not available in --offline mode.
61

62
=head1 METHODS
63

64
=cut
65

66

67
use strict;
220✔
68
use warnings;
220✔
69
no warnings 'recursion';
220✔
70

71
package Bio::EnsEMBL::VEP::Parser::HGVS;
72

73
use base qw(Bio::EnsEMBL::VEP::Parser);
220✔
74

75
use Bio::EnsEMBL::VEP::Utils qw(merge_arrays);
220✔
76
use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
220✔
77
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
220✔
78
use Bio::EnsEMBL::IO::ListBasedParser;
220✔
79

80

81
=head2 new
82

83
  Arg 1      : hashref $args
84
               {
85
                 config    => Bio::EnsEMBL::VEP::Config,
86
                 file      => string or filehandle,
87
               }
88
  Example    : $parser = Bio::EnsEMBL::VEP::Parser::HGVS->new($args);
89
  Description: Create a new Bio::EnsEMBL::VEP::Parser::HGVS object.
90
  Returntype : Bio::EnsEMBL::VEP::Parser::HGVS
91
  Exceptions : throws if offline mode (--offline) enabled
92
  Caller     : Runner
93
  Status     : Stable
94

95
=cut
96

97
sub new {
98
  my $caller = shift;
120✔
99
  my $class = ref($caller) || $caller;
120✔
100
  
101
  my $self = $class->SUPER::new(@_);
120✔
102

103
  # requires db connection
104
  throw("ERROR: Cannot use HGVS format in offline mode") if $self->param('offline');
120✔
105

106
  $self->add_shortcuts(['ambiguous_hgvs']);
116✔
107

108
  return $self;
116✔
109
}
110

111

112
=head2 parser
113

114
  Example    : $io_parser = $parser->parser();
115
  Description: Get ensembl-io parser object used to read data from input.
116
  Returntype : Bio::EnsEMBL::IO::ListBasedParser
117
  Exceptions : none
118
  Caller     : general
119
  Status     : Stable
120

121
=cut
122

123
sub parser {
124
  my $self = shift;
352✔
125
  return $self->{parser} ||= Bio::EnsEMBL::IO::ListBasedParser->open($self->file);
352✔
126
}
127

128

129
=head2 create_VariationFeatures
130

131
  Example    : $vfs = $parser->create_VariationFeatures();
132
  Description: Create a VariationFeature object from the current line
133
               of input. 
134
  Returntype : arrayref of Bio::EnsEMBL::VariationFeature
135
  Exceptions : warns if unable to parse HGVS string
136
  Caller     : next()
137
  Status     : Stable
138

139
=cut
140

141
sub create_VariationFeatures {
142
  my $self = shift;
176✔
143

144
  my $parser = $self->parser;
176✔
145
  $parser->next();
176✔
146

147
  $self->skip_empty_lines();
176✔
148

149
  return [] unless $parser->{record};
176✔
150

151
  $self->line_number($self->line_number + 1);
120✔
152

153
  my $hgvs = $parser->get_value;
120✔
154

155
  # remove whitespace
156
  $hgvs =~ s/\s+//g;
120✔
157

158
  if ($hgvs =~ /(\[|\])/ ){
120✔
159
    $self->warning_msg("WARNING: Unable to parse HGVS notation \'$hgvs\'\n");
×
160
    return $self->create_VariationFeatures 
×
161
  }
162

163
  my $param_core_group = $self->param('core_type');
120✔
164
  my @core_groups = sort {($b eq $param_core_group) cmp ($a eq $param_core_group)} qw(core otherfeatures);
120✔
165
  
166
  my $vfa = $self->get_adaptor('variation', 'VariationFeature');
120✔
167
  my $vfs = [];
120✔
168
  my @errors;
120✔
169
  my $max_size = $self->param('max_sv_size') || 5000;
120✔
170

171
  foreach my $core_group(@core_groups) {
120✔
172
    next if (($hgvs =~ /NM_/ || $hgvs =~ /XM_/) && $core_group eq 'core');
152✔
173
    my $sa  = $self->get_adaptor($core_group, 'Slice');
144✔
174
    my $ta  = $self->get_adaptor($core_group, 'Transcript');
144✔
175

176
    # not all hgvs notations are supported yet, so we have to wrap it in an eval
177
    eval {
144✔
178
      if($self->{ambiguous_hgvs}) {
144✔
179
        $vfs = $vfa->fetch_all_possible_by_hgvs_notation(
26✔
180
          -hgvs               => $hgvs,
181
          -slice_adaptor      => $sa,
182
          -transcript_adaptor => $ta,
183
          -replace_ref        => $self->{lookup_ref} || 0,
26✔
184
          -max_size           => $max_size,
185
        );
186
      }
187
      else {
188
        push @$vfs, $vfa->fetch_by_hgvs_notation(
46✔
189
          -hgvs               => $hgvs,
190
          -slice_adaptor      => $sa,
191
          -transcript_adaptor => $ta,
192
          -replace_ref        => $self->{lookup_ref} || 0,
46✔
193
          -max_size           => $max_size,
194
        );
195
      }
196
    };
197

198
    # only log unique errors
199
    merge_arrays(\@errors, [$@]) if $@ && length($@) > 1;
144✔
200

201
    last if @$vfs;
144✔
202
  }
203

204
  unless(@$vfs || scalar(@errors) == 0) {
120✔
205
    my %known_messages_hash = ("MSG: Region requested must be smaller than $max_size" => 0);
12✔
206
    
207
    my @grep_names = grep(/^MSG:/, split(/\n/, $errors[0]));
12✔
208
    my @error_message = @errors;
12✔
209
    if (exists( $known_messages_hash{$grep_names[0]})) {
12✔
210
      $grep_names[0] .= ". This value can be adjusted with --max_sv_size.";
×
211
      @error_message = @grep_names;
×
212
    }
213
    
214
    $self->warning_msg("WARNING: Unable to parse HGVS notation \'$hgvs\'\n".join("\n", @error_message));
12✔
215
    return $self->create_VariationFeatures;
12✔
216
  }
217

218
  # warn if this looks like a gene
219
  if($hgvs =~ /\:[cnp]\./ && $hgvs !~ /^(ENS|[NX][CGMRP]\_|LRG\_)/) {
108✔
220
    my $hgvs_ref = (split(':', $hgvs))[0];
12✔
221
    $self->warning_msg(
6✔
222
      "WARNING: Possible invalid use of gene or protein identifier '$hgvs_ref' as HGVS reference; ".
223
      (
224
        $self->{ambiguous_hgvs} ?
225
        "$hgvs may resolve to multiple genomic locations" :
6✔
226
        "most likely transcript will be selected"
227
      )
228
    );
229
  }
230

231
  foreach my $vf(@$vfs) {
108✔
232

233
    # transfer to whole chromosome slice
234
    $vf = $vf->transfer($vf->slice->seq_region_Slice);
148✔
235

236
    # name it after the HGVS
237
    $vf->{variation_name} = $hgvs;
148✔
238

239
    # add chr attrib
240
    $vf->{chr} = $vf->slice->seq_region_name;
148✔
241

242
    $vf->{_line} = [$hgvs];
148✔
243
  }
244

245
  # post_process_vfs will do lookup_ref again, we've already done it
246
  my $prev = $self->{lookup_ref} || 0;
108✔
247
  $self->{lookup_ref} = 0;
108✔
248
  my $return = $self->post_process_vfs($vfs);
108✔
249
  $self->{lookup_ref} = $prev;
108✔
250

251
  return $return;
108✔
252
}
253

254
1;
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc