5104

pending completion

Build # 5104

Build Type

push

travis-ci-com

Committed by

web-flow

Commit Message

Merge pull request #922 from nakib103/update_gerp

Update gerp in VCF config

Run Details

22251 of 26761 relevant lines covered (83.15%)

1446.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

6.62

/modules/Bio/EnsEMBL/Variation/Pipeline/TranscriptEffect.pm

=head1 LICENSE

Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
Copyright [2016-2022] EMBL-European Bioinformatics Institute

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

=cut


=head1 CONTACT

 Please email comments or questions to the public Ensembl
 developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.

 Questions may also be sent to the Ensembl help desk at
 <http://www.ensembl.org/Help/Contact>.

=cut

package Bio::EnsEMBL::Variation::Pipeline::TranscriptEffect;

use strict;
use warnings;

use Bio::EnsEMBL::Variation::TranscriptVariation;
use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(MAX_DISTANCE_FROM_TRANSCRIPT overlap);
use Bio::EnsEMBL::Variation::Utils::FastaSequence qw(setup_fasta);

use FileHandle;
use ImportUtils qw(load);
use Fcntl qw(:flock SEEK_END);

use base qw(Bio::EnsEMBL::Variation::Pipeline::BaseVariationProcess);

my $DEBUG   = 0;

sub run {
  my $self = shift;

  my $disambiguate_sn_alleles = $self->param('disambiguate_single_nucleotide_alleles');
  my $mtmp = $self->param('mtmp_table');
  my $max_distance = $self->param('max_distance');
  my $by_transcript = ($self->param('analysis') eq 'by_transcript') ? 1 : 0;
  my $prevent_shifting = $self->param('prevent_shifting');


  my $variations_to_include;
  # if (my $vars = $self->param('variations_to_include')) {
  #   # turn the list of variation names into a hash to speed up checking
  #   $variations_to_include = { map { $_ => 1 } @$vars };
  # }

  # clear the registry here
  # this hopefully prevents any sequence caching issues
  # overhanging from previous jobs executed in the same hive process
  Bio::EnsEMBL::Registry->clear();

  my $core_dba = $self->get_species_adaptor('core');
  $core_dba->dbc->reconnect_when_lost(1);

  my $var_dba = $self->get_species_adaptor('variation');
  $var_dba->dbc->reconnect_when_lost(1);

  my $dbc = $var_dba->dbc();

  my $sa = $core_dba->get_SliceAdaptor;
  
  my $tva = $var_dba->get_TranscriptVariationAdaptor;
  $tva->db->include_failed_variations(1);

  if((my $fasta = $self->param('fasta')) && !$Bio::EnsEMBL::Slice::_fasta_redefined && !$Bio::EnsEMBL::Slice::fasta_db) {
    # we need to find the assembly version to tell it about PARs
    my ($highest_cs) = @{$core_dba->get_CoordSystemAdaptor->fetch_all()};
    my $assembly = $highest_cs->version();
    setup_fasta(-FASTA => $fasta, -ASSEMBLY => $assembly);
  }
  else {
    # set seq cache size higher
    # this prevents repeated DB lookups for sequence for HGVS e.g. in long introns
    # to do this we fetch a sequence adaptor
    my $seq_ad = $core_dba->get_SequenceAdaptor;
    # then reset its cache
    # the first param passed to this method is the "chunk power"
    # essentially the length in bp of cached sequence will be 2 ^ chunk_power
    $seq_ad->_init_seq_instance($seq_ad->chunk_power + 2);
  }

  my $slice;
  my $stable_id;
  my @transcripts = ();

  if ($by_transcript) {
    my $transcript_stable_id = $self->param('transcript_stable_id');
    $stable_id = $transcript_stable_id;
    my $ta = $core_dba->get_TranscriptAdaptor;
    my $transcript = $ta->fetch_by_stable_id($transcript_stable_id) 
      or die "failed to fetch transcript for stable id: $transcript_stable_id";
    $slice = $sa->fetch_by_transcript_stable_id(
      $transcript_stable_id, 
      $max_distance
    ) or die "failed to get slice around transcript: $transcript_stable_id";
    # call seq here to help cache
    $slice->seq();
    $transcript = $transcript->transfer($slice);
    push @transcripts, $transcript;
  } else {
    my $gene_stable_id =  $self->param('gene_stable_id');
    $stable_id = $gene_stable_id;
    my $ga = $core_dba->get_GeneAdaptor;
    my $gene = $ga->fetch_by_stable_id($gene_stable_id) 
      or die "failed to fetch gene for stable id: $gene_stable_id";
    $slice = $sa->fetch_by_gene_stable_id(
      $gene_stable_id, 
      $max_distance
    ) or die "failed to get slice around gene: $gene_stable_id";
    # call seq here to help cache
    $slice->seq();
    $gene = $gene->transfer($slice);
    push @transcripts, @{ $gene->get_all_Transcripts };
  }

  my $vfa = $var_dba->get_VariationFeatureAdaptor();
  my @vfs = (
    @{ $vfa->fetch_all_by_Slice_SO_terms($slice) },
    @{ $vfa->fetch_all_somatic_by_Slice_SO_terms($slice) }
  );
  my $web_index_files_dir = $self->get_files_dir($stable_id, 'web_index');

  # get a fh for the hgvs file too
  my $hgvs_fh = FileHandle->new();
  $hgvs_fh->open(">" . $web_index_files_dir . "/$stable_id\_variation_hgvs.txt") or die "Cannot open dump file " . $web_index_files_dir . "/$stable_id\_variation_hgvs.txt: $!";

  my $hgvs_by_var = {};
  my $var_count = 0;

  my $files = $self->get_dump_files($stable_id, $tva);

  my %biotypes_to_skip = (
    'lncRNA' => 1,
    'processed_pseudogene' => 1,
    'unprocessed_pseudogene' => 1,
  );

  for my $transcript (@transcripts) {
    
    my $biotype = $transcript->biotype;
    my $is_mane = $transcript->is_mane();
    my $stable_id = $transcript->stable_id;
    my @vf_ids;

    for my $vf(@vfs) {

      if (defined $variations_to_include) {
        next unless $variations_to_include->{$vf->variation_name};
      }

      next unless overlap($vf->start, $vf->end, $transcript->start - $max_distance, $transcript->end + $max_distance);

      my $tv = Bio::EnsEMBL::Variation::TranscriptVariation->new(
        -transcript     => $transcript,
        -variation_feature  => $vf,
        -adaptor      => $tva,
        -disambiguate_single_nucleotide_alleles => $disambiguate_sn_alleles,
        -no_transfer    => 1,
        -no_shift        => $prevent_shifting, 
      );

      # if the variation has no effect on the transcript $tv will be undef
      if ($tv) {#} && ( scalar(@{ $tv->consequence_type }) > 0) ) {

              next if (!scalar(@{ $tv->consequence_type }) && ($tv->distance_to_transcript > $max_distance));

        push @vf_ids, $vf->dbID();

        # store now or save to store later? Uncomment out the behaviour you want
        # save to store later uses more memory but means you don't have to sort human TV after the run
        
        ## BEHAVIOUR 1: store to DB now
        ## comment: slow as MySQL does INSERT DELAYED
        # $tva->store($tv, $mtmp);
        ## end block

        ## BEHAVIOUR 2: store to memory
        ## comment: seems like MySQL doesn't work very quickly using table locks required and also still does one INSERT per row
        # push @write_data, @{$tva->_get_write_data($tv)};
        ## end block
        
        ## BEHAVIOUR 3: store to tmp file
        ## comment: seems to be the fastest as it uses LOAD DATA which inherently locks anyway and does only one MySQL command per gene
        my $data = $tva->_get_write_data($tv);
        my $tv_fh = $files->{transcript_variation}->{fh};
        print $tv_fh join("\t", map {defined($_) ? $_ : '\N'} @$_)."\n" for @$data;

        if($mtmp) {

          my $mtmp_data = $tva->_get_mtmp_write_data_from_tv_write_data($data);
          my $mtmp_fh = $files->{MTMP_transcript_variation}->{fh};
          
          # Check species
          my $species = $self->param('species');
          # MANE is only available for human
          my $is_human = $species =~ /homo_sapiens|human/ ? 1 : 0;
          my $write_mtmp = ($is_human && $is_mane) || !$is_human ? 1 : 0;
          
          unless($biotypes_to_skip{$biotype} || !$write_mtmp){
            print $mtmp_fh join("\t", map {defined($_) ? $_ : '\N'} @$_)."\n" for @$mtmp_data;
          }
        }

        ## end block

      
        ## populate tables for website index building
        my $var_id = $vf->get_Variation_dbID();

        for my $allele (@{ $tv->get_all_alternate_TranscriptVariationAlleles }) {

          next unless defined $allele->hgvs_transcript();
          my $hgvs_transcript = (split/\:/, $allele->hgvs_transcript())[1];
          $hgvs_by_var->{$var_id}->{$hgvs_transcript} = 1;
          
          next unless defined $allele->hgvs_protein();
          my $hgvs_protein  = (split/\:/, $allele->hgvs_protein())[1];
          $hgvs_by_var->{$var_id}->{$hgvs_protein} = 1 if defined $hgvs_protein && $hgvs_protein =~/^p/; ## don't store synonymous
        }

        # dump out these hashes periodically to stop memory exploding
        # will lead to more duplicates in the output file but better that than job failing
        if(++$var_count > 100000) {
          $self->dump_hgvs_var($hgvs_by_var, $hgvs_fh);
          $var_count = 0;
          $hgvs_by_var = {};
        }
      }                       
    }

    # Delete Updated
     if (-e $self->param('update_diff')){
        my @chunk_delete;

        push @chunk_delete, [ splice @vf_ids, 0, 500 ]  while @vf_ids;

        for (@chunk_delete){
          my $joined_vf_ids = join(',', @$_);
          next if $joined_vf_ids == "";
          $dbc->do(qq{
                    DELETE FROM  transcript_variation
                    WHERE   variation_feature_id IN ($joined_vf_ids)
                    AND     feature_stable_id = "$stable_id"
          });

          $dbc->do(qq{
                    DELETE FROM  MTMP_transcript_variation
                    WHERE   variation_feature_id IN ($joined_vf_ids)
                    AND     feature_stable_id = "$stable_id"
          }) if($mtmp);
        }
    }
  }

  ## uncomment this if using BEHAVIOUR 2 above
  # if(@write_data) {
  #   $var_dba->dbc->do("LOCK TABLES transcript_variation WRITE");
  #   $tva->_store_write_data(\@write_data, 1);
  #   $var_dba->dbc->do("UNLOCK TABLES");

  #   if($mtmp) {
  #     $var_dba->dbc->do("LOCK TABLES MTMP_transcript_variation WRITE");
  #     $tva->_store_mtmp_write_data($tva->_get_mtmp_write_data_from_tv_write_data(\@write_data), 1) if $mtmp;
  #     $var_dba->dbc->do("UNLOCK TABLES");
  #   }
  # }
  ## end block

  ## uncomment this if using BEHAVIOUR 3 above
  ## LOADING HAS BEEN MOVED TO FinishTranscriptEffect.pm if run in by_transcript analysis mode
  my $tmpdir = $self->get_files_dir($stable_id, 'transcript_effect');
  foreach my $table(keys %$files) {
    $files->{$table}->{fh}->close();
    if (!$by_transcript) {
      $ImportUtils::TMP_DIR = $tmpdir;
      $ImportUtils::TMP_FILE = $files->{$table}->{filename};
      load($var_dba->dbc, ($table, @{$files->{$table}->{cols}}));
    }
  }
  ## end block

  # dump HGVS stubs to file for web index
  $self->dump_hgvs_var($hgvs_by_var, $hgvs_fh);

  $hgvs_fh->close();

  print STDERR "All done\n" if $DEBUG;

  return;
}

sub write_output {
  my $self = shift;
  return;
}

sub dump_hgvs_var {
  my ($self, $hgvs_by_var, $fh) = @_;

  if($hgvs_by_var) {
    for my $var_id(keys %$hgvs_by_var) {
      print $fh "$var_id\t$_\n" for keys %{$hgvs_by_var->{$var_id}};
    }
  }
}

sub get_dump_files {
  my $self = shift;
  my $stable_id = shift;
  my $tva = shift;
  # initialise a hash of files
  my $files = {
    transcript_variation      => { 'cols' => [$tva->_write_columns],      },
    MTMP_transcript_variation => { 'cols' => [$tva->_mtmp_write_columns], },
  };
  my $tmpdir = $self->get_files_dir($stable_id, 'transcript_effect');
  # create file handles
  for my $table(keys %$files) {
    my $hash = $files->{$table};
    $hash->{filename} = sprintf('%s_%s.txt', $stable_id, $table);
    $hash->{filepath} = sprintf('%s/%s', $tmpdir, $hash->{filename});
    
    my $fh = FileHandle->new();
    $fh->open(">".$hash->{filepath}) or die("ERROR: Could not write to ".$hash->{filepath});
    $hash->{fh} = $fh;
  }
  return $files;
}


1;

1	=head1 LICENSE
2
3	Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4	Copyright [2016-2022] EMBL-European Bioinformatics Institute
5
6	Licensed under the Apache License, Version 2.0 (the "License");
7	you may not use this file except in compliance with the License.
8	You may obtain a copy of the License at
9
10	http://www.apache.org/licenses/LICENSE-2.0
11
12	Unless required by applicable law or agreed to in writing, software
13	distributed under the License is distributed on an "AS IS" BASIS,
14	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15	See the License for the specific language governing permissions and
16	limitations under the License.
17
18	=cut
19
20
21	=head1 CONTACT
22
23	Please email comments or questions to the public Ensembl
24	developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
25
26	Questions may also be sent to the Ensembl help desk at
27	<http://www.ensembl.org/Help/Contact>.
28
29	=cut
30
31	package Bio::EnsEMBL::Variation::Pipeline::TranscriptEffect;
32
33	use strict;	1✔
34	use warnings;	1✔
35
36	use Bio::EnsEMBL::Variation::TranscriptVariation;	1✔
37	use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(MAX_DISTANCE_FROM_TRANSCRIPT overlap);	1✔
38	use Bio::EnsEMBL::Variation::Utils::FastaSequence qw(setup_fasta);	1✔
39
40	use FileHandle;	1✔
41	use ImportUtils qw(load);	1✔
42	use Fcntl qw(:flock SEEK_END);	1✔
43
44	use base qw(Bio::EnsEMBL::Variation::Pipeline::BaseVariationProcess);	1✔
45
46	my $DEBUG = 0;
47
48	sub run {
49	my $self = shift;	×
50
51	my $disambiguate_sn_alleles = $self->param('disambiguate_single_nucleotide_alleles');	×
52	my $mtmp = $self->param('mtmp_table');	×
53	my $max_distance = $self->param('max_distance');	×
54	my $by_transcript = ($self->param('analysis') eq 'by_transcript') ? 1 : 0;	×
55	my $prevent_shifting = $self->param('prevent_shifting');	×
56
57
58	my $variations_to_include;	×
59	# if (my $vars = $self->param('variations_to_include')) {
60	# # turn the list of variation names into a hash to speed up checking
61	# $variations_to_include = { map { $_ => 1 } @$vars };
62	# }
63
64	# clear the registry here
65	# this hopefully prevents any sequence caching issues
66	# overhanging from previous jobs executed in the same hive process
67	Bio::EnsEMBL::Registry->clear();	×
68
69	my $core_dba = $self->get_species_adaptor('core');	×
70	$core_dba->dbc->reconnect_when_lost(1);	×
71
72	my $var_dba = $self->get_species_adaptor('variation');	×
73	$var_dba->dbc->reconnect_when_lost(1);	×
74
75	my $dbc = $var_dba->dbc();	×
76
77	my $sa = $core_dba->get_SliceAdaptor;	×
78
79	my $tva = $var_dba->get_TranscriptVariationAdaptor;	×
80	$tva->db->include_failed_variations(1);	×
81
82	if((my $fasta = $self->param('fasta')) && !$Bio::EnsEMBL::Slice::_fasta_redefined && !$Bio::EnsEMBL::Slice::fasta_db) {	×
83	# we need to find the assembly version to tell it about PARs
84	my ($highest_cs) = @{$core_dba->get_CoordSystemAdaptor->fetch_all()};	×
85	my $assembly = $highest_cs->version();	×
86	setup_fasta(-FASTA => $fasta, -ASSEMBLY => $assembly);	×
87	}
88	else {
89	# set seq cache size higher
90	# this prevents repeated DB lookups for sequence for HGVS e.g. in long introns
91	# to do this we fetch a sequence adaptor
92	my $seq_ad = $core_dba->get_SequenceAdaptor;	×
93	# then reset its cache
94	# the first param passed to this method is the "chunk power"
95	# essentially the length in bp of cached sequence will be 2 ^ chunk_power
96	$seq_ad->_init_seq_instance($seq_ad->chunk_power + 2);	×
97	}
98
99	my $slice;	×
100	my $stable_id;	×
101	my @transcripts = ();	×
102
103	if ($by_transcript) {	×
104	my $transcript_stable_id = $self->param('transcript_stable_id');	×
105	$stable_id = $transcript_stable_id;	×
106	my $ta = $core_dba->get_TranscriptAdaptor;	×
107	my $transcript = $ta->fetch_by_stable_id($transcript_stable_id)	×
108	or die "failed to fetch transcript for stable id: $transcript_stable_id";
109	$slice = $sa->fetch_by_transcript_stable_id(	×
110	$transcript_stable_id,
111	$max_distance
112	) or die "failed to get slice around transcript: $transcript_stable_id";
113	# call seq here to help cache
114	$slice->seq();	×
115	$transcript = $transcript->transfer($slice);	×
116	push @transcripts, $transcript;	×
117	} else {
118	my $gene_stable_id = $self->param('gene_stable_id');	×
119	$stable_id = $gene_stable_id;	×
120	my $ga = $core_dba->get_GeneAdaptor;	×
121	my $gene = $ga->fetch_by_stable_id($gene_stable_id)	×
122	or die "failed to fetch gene for stable id: $gene_stable_id";
123	$slice = $sa->fetch_by_gene_stable_id(	×
124	$gene_stable_id,
125	$max_distance
126	) or die "failed to get slice around gene: $gene_stable_id";
127	# call seq here to help cache
128	$slice->seq();	×
129	$gene = $gene->transfer($slice);	×
130	push @transcripts, @{ $gene->get_all_Transcripts };	×
131	}
132
133	my $vfa = $var_dba->get_VariationFeatureAdaptor();	×
134	my @vfs = (	×
135	@{ $vfa->fetch_all_by_Slice_SO_terms($slice) },	×
136	@{ $vfa->fetch_all_somatic_by_Slice_SO_terms($slice) }	×
137	);
138	my $web_index_files_dir = $self->get_files_dir($stable_id, 'web_index');	×
139
140	# get a fh for the hgvs file too
141	my $hgvs_fh = FileHandle->new();	×
142	$hgvs_fh->open(">" . $web_index_files_dir . "/$stable_id\_variation_hgvs.txt") or die "Cannot open dump file " . $web_index_files_dir . "/$stable_id\_variation_hgvs.txt: $!";	×
143
144	my $hgvs_by_var = {};	×
145	my $var_count = 0;	×
146
147	my $files = $self->get_dump_files($stable_id, $tva);	×
148
149	my %biotypes_to_skip = (	×
150	'lncRNA' => 1,
151	'processed_pseudogene' => 1,
152	'unprocessed_pseudogene' => 1,
153	);
154
155	for my $transcript (@transcripts) {	×
156
157	my $biotype = $transcript->biotype;	×
158	my $is_mane = $transcript->is_mane();	×
159	my $stable_id = $transcript->stable_id;	×
160	my @vf_ids;	×
161
162	for my $vf(@vfs) {	×
163
164	if (defined $variations_to_include) {	×
165	next unless $variations_to_include->{$vf->variation_name};	×
166	}
167
168	next unless overlap($vf->start, $vf->end, $transcript->start - $max_distance, $transcript->end + $max_distance);	×
169
170	my $tv = Bio::EnsEMBL::Variation::TranscriptVariation->new(	×
171	-transcript => $transcript,
172	-variation_feature => $vf,
173	-adaptor => $tva,
174	-disambiguate_single_nucleotide_alleles => $disambiguate_sn_alleles,
175	-no_transfer => 1,
176	-no_shift => $prevent_shifting,
177	);
178
179	# if the variation has no effect on the transcript $tv will be undef
180	if ($tv) {#} && ( scalar(@{ $tv->consequence_type }) > 0) ) {	×
181
182	next if (!scalar(@{ $tv->consequence_type }) && ($tv->distance_to_transcript > $max_distance));	×
183
184	push @vf_ids, $vf->dbID();	×
185
186	# store now or save to store later? Uncomment out the behaviour you want
187	# save to store later uses more memory but means you don't have to sort human TV after the run
188
189	## BEHAVIOUR 1: store to DB now
190	## comment: slow as MySQL does INSERT DELAYED
191	# $tva->store($tv, $mtmp);
192	## end block
193
194	## BEHAVIOUR 2: store to memory
195	## comment: seems like MySQL doesn't work very quickly using table locks required and also still does one INSERT per row
196	# push @write_data, @{$tva->_get_write_data($tv)};
197	## end block
198
199	## BEHAVIOUR 3: store to tmp file
200	## comment: seems to be the fastest as it uses LOAD DATA which inherently locks anyway and does only one MySQL command per gene
201	my $data = $tva->_get_write_data($tv);	×
202	my $tv_fh = $files->{transcript_variation}->{fh};	×
203	print $tv_fh join("\t", map {defined($_) ? $_ : '\N'} @$_)."\n" for @$data;	×
204
205	if($mtmp) {	×
206
207	my $mtmp_data = $tva->_get_mtmp_write_data_from_tv_write_data($data);	×
208	my $mtmp_fh = $files->{MTMP_transcript_variation}->{fh};	×
209
210	# Check species
211	my $species = $self->param('species');	×
212	# MANE is only available for human
213	my $is_human = $species =~ /homo_sapiens\|human/ ? 1 : 0;	×
214	my $write_mtmp = ($is_human && $is_mane) \|\| !$is_human ? 1 : 0;	×
215
216	unless($biotypes_to_skip{$biotype} \|\| !$write_mtmp){	×
217	print $mtmp_fh join("\t", map {defined($_) ? $_ : '\N'} @$_)."\n" for @$mtmp_data;	×
218	}
219	}
220
221	## end block
222
223
224	## populate tables for website index building
225	my $var_id = $vf->get_Variation_dbID();	×
226
227	for my $allele (@{ $tv->get_all_alternate_TranscriptVariationAlleles }) {	×
228
229	next unless defined $allele->hgvs_transcript();	×
230	my $hgvs_transcript = (split/\:/, $allele->hgvs_transcript())[1];	×
231	$hgvs_by_var->{$var_id}->{$hgvs_transcript} = 1;	×
232
233	next unless defined $allele->hgvs_protein();	×
234	my $hgvs_protein = (split/\:/, $allele->hgvs_protein())[1];	×
235	$hgvs_by_var->{$var_id}->{$hgvs_protein} = 1 if defined $hgvs_protein && $hgvs_protein =~/^p/; ## don't store synonymous	×
236	}
237
238	# dump out these hashes periodically to stop memory exploding
239	# will lead to more duplicates in the output file but better that than job failing
240	if(++$var_count > 100000) {	×
241	$self->dump_hgvs_var($hgvs_by_var, $hgvs_fh);	×
242	$var_count = 0;	×
243	$hgvs_by_var = {};	×
244	}
245	}
246	}
247
248	# Delete Updated
249	if (-e $self->param('update_diff')){	×
250	my @chunk_delete;	×
251
252	push @chunk_delete, [ splice @vf_ids, 0, 500 ] while @vf_ids;	×
253
254	for (@chunk_delete){	×
255	my $joined_vf_ids = join(',', @$_);	×
256	next if $joined_vf_ids == "";	×
257	$dbc->do(qq{	×
258	DELETE FROM transcript_variation
259	WHERE variation_feature_id IN ($joined_vf_ids)
260	AND feature_stable_id = "$stable_id"
261	});
262
263	$dbc->do(qq{	×
264	DELETE FROM MTMP_transcript_variation
265	WHERE variation_feature_id IN ($joined_vf_ids)
266	AND feature_stable_id = "$stable_id"
267	}) if($mtmp);
268	}
269	}
270	}
271
272	## uncomment this if using BEHAVIOUR 2 above
273	# if(@write_data) {
274	# $var_dba->dbc->do("LOCK TABLES transcript_variation WRITE");
275	# $tva->_store_write_data(\@write_data, 1);
276	# $var_dba->dbc->do("UNLOCK TABLES");
277
278	# if($mtmp) {
279	# $var_dba->dbc->do("LOCK TABLES MTMP_transcript_variation WRITE");
280	# $tva->_store_mtmp_write_data($tva->_get_mtmp_write_data_from_tv_write_data(\@write_data), 1) if $mtmp;
281	# $var_dba->dbc->do("UNLOCK TABLES");
282	# }
283	# }
284	## end block
285
286	## uncomment this if using BEHAVIOUR 3 above
287	## LOADING HAS BEEN MOVED TO FinishTranscriptEffect.pm if run in by_transcript analysis mode
288	my $tmpdir = $self->get_files_dir($stable_id, 'transcript_effect');	×
289	foreach my $table(keys %$files) {	×
290	$files->{$table}->{fh}->close();	×
291	if (!$by_transcript) {	×
292	$ImportUtils::TMP_DIR = $tmpdir;	×
293	$ImportUtils::TMP_FILE = $files->{$table}->{filename};	×
294	load($var_dba->dbc, ($table, @{$files->{$table}->{cols}}));	×
295	}
296	}
297	## end block
298
299	# dump HGVS stubs to file for web index
300	$self->dump_hgvs_var($hgvs_by_var, $hgvs_fh);	×
301
302	$hgvs_fh->close();	×
303
304	print STDERR "All done\n" if $DEBUG;	×
305
306	return;	×
307	}
308
309	sub write_output {
310	my $self = shift;	×
311	return;	×
312	}
313
314	sub dump_hgvs_var {
315	my ($self, $hgvs_by_var, $fh) = @_;	×
316
317	if($hgvs_by_var) {	×
318	for my $var_id(keys %$hgvs_by_var) {	×
319	print $fh "$var_id\t$_\n" for keys %{$hgvs_by_var->{$var_id}};	×
320	}
321	}
322	}
323
324	sub get_dump_files {
325	my $self = shift;	×
326	my $stable_id = shift;	×
327	my $tva = shift;	×
328	# initialise a hash of files
329	my $files = {	×
330	transcript_variation => { 'cols' => [$tva->_write_columns], },
331	MTMP_transcript_variation => { 'cols' => [$tva->_mtmp_write_columns], },
332	};
333	my $tmpdir = $self->get_files_dir($stable_id, 'transcript_effect');	×
334	# create file handles
335	for my $table(keys %$files) {	×
336	my $hash = $files->{$table};	×
337	$hash->{filename} = sprintf('%s_%s.txt', $stable_id, $table);	×
338	$hash->{filepath} = sprintf('%s/%s', $tmpdir, $hash->{filename});	×
339
340	my $fh = FileHandle->new();	×
341	$fh->open(">".$hash->{filepath}) or die("ERROR: Could not write to ".$hash->{filepath});	×
342	$hash->{fh} = $fh;	×
343	}
344	return $files;	×
345	}
346
347
348	1;

Ensembl / ensembl-variation / 5104

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous