• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Ensembl / ensembl-variation / #608922749

30 Aug 2023 04:16PM UTC coverage: 83.169%. First build
#608922749

Pull #1029

travis-ci

Pull Request #1029: Fetch multiple SV by name list

30 of 30 new or added lines in 2 files covered. (100.0%)

22453 of 26997 relevant lines covered (83.17%)

1436.3 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

1.63
/scripts/import/ImportUtils.pm
1
=head1 LICENSE
2

3
Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4
Copyright [2016-2025] EMBL-European Bioinformatics Institute
5

6
Licensed under the Apache License, Version 2.0 (the "License");
7
you may not use this file except in compliance with the License.
8
You may obtain a copy of the License at
9

10
     http://www.apache.org/licenses/LICENSE-2.0
11

12
Unless required by applicable law or agreed to in writing, software
13
distributed under the License is distributed on an "AS IS" BASIS,
14
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
See the License for the specific language governing permissions and
16
limitations under the License.
17

18
=cut
19

20
use strict;
1✔
21

22

23
=head1 CONTACT
24

25
  Please email comments or questions to the public Ensembl
26
  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
27

28
  Questions may also be sent to the Ensembl help desk at
29
  <http://www.ensembl.org/Help/Contact>.
30

31
=cut
32

33
use warnings;
1✔
34

35
package ImportUtils;
36

37
use Exporter;
1✔
38

39
our @ISA = ('Exporter');
40

41
our @EXPORT_OK = qw(dumpSQL debug create create_and_load load loadfile get_create_statement make_xml_compliant update_table);
42

43
our $TMP_DIR = "/tmp";
44
our $TMP_FILE = 'tabledump.txt';
45

46
# This will strip non-xml-compliant characters from an infile, saving a backup in {infile name}.bak
47
# If no infile was specified, will use the tempfile but no backup will be kept. If no xml version was specified, will default to 1.1
48
# A replacement character or string can be passed
49
# If the xml version was not recognized, will do nothing.
50
sub make_xml_compliant {
51
  my $infile = shift;
×
52
  my $version = shift;
×
53
  my $replacement = shift;
×
54
  
55
  my $keep_backup = defined($infile);
×
56
  
57
  $infile ||= $TMP_DIR . "/" . $TMP_FILE;
×
58
  $version ||= "1.1";
×
59
  $replacement ||= "";
×
60
  
61
  my @ARGV_bak = @ARGV;
×
62
  @ARGV = ($infile);
×
63
  $^I = ".bak";
×
64
  while (<>) {
×
65
    if ($version =~ m/1\.1/) {
×
66
      s/[^\x01-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]/$replacement/go;
×
67
      s/[\x01-\x08\x0B-\x0C\x0E-\x1F\x7F-\x84\x86-\x9F]/$replacement/go;
×
68
    }
69
    elsif ($version =~ m/1\.0/) {
70
      s/[^\x09\x0A\x0D\x20-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]/$replacement/go;
×
71
    }
72
    print;
×
73
  }
74
  # Remove the backup if an input file wasn't supplied
75
  unlink($infile . ".bak") if (!$keep_backup);
×
76
  # Restore the @ARGV variable
77
  @ARGV = @ARGV_bak;
×
78
}
79

80
# successive dumping and loading of tables is typical for this process
81
# dump does effectively a select into outfile without server file system access
82
sub dumpSQL {
83
  my $db  = shift;
×
84
  my $sql = shift;
×
85
  my $dbe = shift;  ## handle postgreSQL differently
×
86

87
  if($dbe =~/pg|postgreSQL/i ){
×
88
    dumpSQL_PG($db, $sql);
×
89
  }
90
  else{
91
   my $sth = $db->prepare( $sql );
×
92
   dumpPreparedSQL($sth);
×
93
   $sth->finish();
×
94
  }
95
 
96
}
97

98
sub dumpPreparedSQL {
99
  my $sth = shift;
×
100
  
101
  local *FH;
×
102
  my $counter = 0;
×
103
  open( FH, ">$TMP_DIR/$TMP_FILE" )
×
104
      or die( "Cannot open $TMP_DIR/$TMP_FILE: $!" );
105
      
106
  $sth->{mysql_use_result} = 1;
×
107
  $sth->execute();
×
108
  my $first;
×
109
  while ( my $aref = $sth->fetchrow_arrayref() ) {
×
110
    my @a = map {defined($_) ? $_ : '\N'} @$aref;
×
111
    print FH join("\t", @a), "\n";
×
112
  }
113

114
  close FH;
×
115
}
116

117
## use postgresql cursors to avoid memory issues on large exports
118
sub dumpSQL_PG {
119
 my $dbh = shift; 
×
120
 my $sql = shift;
×
121

122
  local *FH;
×
123
  my $counter = 0;
×
124
  open( FH, ">$TMP_DIR/$TMP_FILE" )
×
125
      or die( "Cannot open $TMP_DIR/$TMP_FILE: $!" );
126

127
   $dbh->db_handle->begin_work();
×
128
   $dbh->do("DECLARE csr CURSOR  FOR $sql");
×
129
   while (1) {
×
130
     my $sth = $dbh->prepare("fetch 100000 from csr");
×
131
     $sth->execute;
×
132
     last if 0 == $sth->rows;
×
133
   
134
     while ( my $aref = $sth->fetchrow_arrayref() ) {
×
135
       my @a = map {defined($_) ? $_ : '\N'} @$aref;
×
136
       print FH join("\t", @a), "\n";
×
137
     }
138
  }
139
   $dbh->do("CLOSE csr");
×
140
   $dbh->db_handle->rollback();
×
141
   close FH;
×
142
}
143

144

145

146
# load imports a table, optionally not all columns
147
# if table doesnt exist, create a varchar(255) for each column
148
sub load {
149
  loadfile("$TMP_DIR/$TMP_FILE",@_);
×
150
}
151

152
sub loadfile {
153
  my $loadfile = shift;
×
154
  my $db = shift;
×
155
  my $tablename = shift;
×
156
  my @colnames = @_;
×
157
  
158
  my $cols = join( ",", @colnames );
×
159

160
  my $table_file = "$TMP_DIR/$tablename\_$$\.txt";
×
161

162
  if (! -e $loadfile) {
×
163
    die("File to load ($loadfile) does not exist");
×
164
  }
165

166
  # Do not rename the $loadfile to the $table_file
167
  # if the $table_file already exists
168
  if (-e $table_file) {
×
169
    die("File to rename to for table load exists ($table_file)");
×
170
   }
171

172
  my $ret = rename($loadfile, $table_file);
×
173
  if (! $ret) {
×
174
    die("rename of ($loadfile) to ($table_file) for table load fails");
×
175
  }
176

177
#  my $host = $db->host();
178
#  my $user = $db->user();
179
#  my $pass = $db->pass();
180
#  my $port = $db->port();
181
#  my $dbname = $db->dbname();
182

183
#  my $call = "mysqlimport -c $cols -h $host -u $user " .
184
#    "-p$pass -P$port $dbname $table_file";
185

186
#  system($call);
187

188
#  unlink("$TMP_DIR/$tablename.txt");
189

190

191
##### Alternative way of doing same thing
192
  my $sql;
×
193

194
  #need to find out if possible use the LOCAL option
195

196
#  my $local_option = 'LOCAL'; #by default, use the LOCAL option
197
#  if( `hostname` =~ /^bc/ ){ # No LOCAL on bcs nodes
198
#    $local_option = '';
199
#  }
200
#  elsif( ! -e $table_file ){ # File is not on local filesystem
201
#    $local_option = '';
202
#  }
203

204
  my $local_option = 'LOCAL';
×
205

206
 # my $host = `hostname`;
207
 # chop $host;
208
 #  $host =~ /(ecs\d+)/; #get the machine, only use LOCAL in ecs machines (ecs2, ecs4)
209
 #  my $local_option = '';
210
 #  #the script is running in ecs machine, let's find out if the file is in the same machine, too
211
 #  if ($1){
212
 #      if ($table_file =~ /$1/){
213
 #          $local_option = 'LOCAL';
214
 #      }
215
 #  }
216

217
  
218
   if ( @colnames ) {
×
219

220
     $sql = qq{
×
221
               LOAD DATA $local_option INFILE "$table_file"
222
               INTO TABLE $tablename( $cols )
223
              };
224
   } else {
225
     $sql = qq{
×
226
               LOAD DATA $local_option INFILE "$table_file"
227
               INTO TABLE $tablename
228
              };
229
   }
230
   $db->do( $sql );
×
231

232
   unlink( "$table_file" );
×
233
}
234

235

236
#
237
# creates a table with specified columns
238
#
239
# by default all columns are VARCHAR(255), but an 'i' may be added after the
240
# column name to make it an INT.  Additionally a '*' means add an index to
241
# the column.
242
#
243
# e.g.  create($db, 'mytable', 'col0', 'col1 *', 'col2 i', 'col3 i*');
244
#
245

246
sub create {
247
  my $db = shift;
×
248
  my $tablename = shift;
×
249
  my @cols = @_;
×
250

251
  my $sql = "CREATE TABLE $tablename ( ";
×
252

253
  my @col_defs;
×
254
  my @idx_defs;
×
255
  my @col_names;
×
256

257
  foreach my $col (@cols) {
×
258
    my ($name, $type, $nullable, $unsigned) = split(/\s+/,$col);
×
259
    push @col_names, $name;
×
260

261
    my $null ="";
×
262
    if (defined($nullable) && $nullable =~/not_null/){$null =" NOT NULL";}
×
263

264
    if(defined($type) && $type =~ /i/ && defined $unsigned ) {
×
265
      push @col_defs, "$name INT unsigned $null";
×
266
    }
267
    elsif(defined($type) && $type =~ /i/) {
268
      push @col_defs, "$name INT  $null";
×
269
    }
270
    elsif (defined($type) && $type =~ /f/) {
271
      push @col_defs, "$name FLOAT $null";
×
272
    }
273
    elsif (defined($type) && $type =~ /l/) {
274
      push @col_defs, "$name TEXT $null";
×
275
    } 
276
    elsif (defined($type) && $type =~ /d/) {
277
       push @col_defs, "$name DOUBLE $null";
×
278
    }
279
    elsif (defined($type) && $type =~ /v/) {
280
      $type =~ s/v//;
×
281
      my $len = 255;
×
282
       push @col_defs, "$name VARCHAR($len) $null";
×
283
    }
284
    else {
285
      push @col_defs, "$name VARCHAR(255) $null";
×
286
    }
287

288
    if(defined($type) && $type =~ /\*/) {
×
289
      push @idx_defs, "KEY ${name}_idx($name)";
×
290
    }
291
  }
292

293
  my $create_cols = join( ",\n", @col_defs, @idx_defs);
×
294

295

296
  $sql .= $create_cols.")";
×
297

298
  $sql .= " MAX_ROWS = 100000000" if ($tablename =~ /^tmp.*gty$/); #need to make bigger this table for human
×
299

300
  $sql .= " ENGINE = 'MyISAM' "; ##may not be default engine
×
301

302
  $db->do( $sql );
×
303

304
  return @col_names;
×
305
}
306

307
#
308
# creates a table with specified columns and loads data that was dumped
309
# to a tmp file into the table.
310
#
311
# by default all columns are VARCHAR(255), but an 'i' may be added after the
312
# column name to make it an INT.  Additionally a '*' means add an index to
313
# the column.
314
#
315
# e.g.  create_and_load($db, 'mytable', 'col0', 'col1 *', 'col2 i', 'col3 i*');
316
#
317

318
sub create_and_load {
319
  my $db = shift;
×
320
  my $tablename = shift;
×
321
  my @cols = @_;
×
322

323
  my @col_names = create($db,$tablename,@cols);
×
324

325
  load( $db, $tablename, @col_names );
×
326
}
327

328
#
329
#ÊGets the create statement to create the desired table from the master_schema_variation database
330
#
331
sub get_create_statement_from_db {
332
  my $dbc = shift;
×
333
  my $table = shift;
×
334
  my $db_name = shift;
×
335

336
  if (defined($db_name)) {
×
337
    $table = $db_name . '.' . $table;
×
338
  }
339

340
  my $stmt = qq{
×
341
    SHOW CREATE TABLE
342
      $table
343
  };
344
  my $result = $dbc->db_handle->selectall_arrayref($stmt)->[0][1];
×
345
  
346
  return $result;
×
347
}
348

349
#ÊGet the create statement for a table from e.g. the table.sql schema definition file
350
sub get_create_statement {
351
  my $table = shift;
×
352
  my $sql_file = shift;
×
353
  
354
  # Parse the file
355
  open(FH,'<',$sql_file) or die("Could not open $sql_file for reading");
×
356
  
357
  # Parse the file into a string
358
  my $contents = "";
×
359
  while (<FH>) {
×
360
    chomp;
×
361
    $contents .= "$_ ";
×
362
  }
363
  close(FH);
×
364
  
365
  # Grab the correct create statement
366
  my ($stmt) = $contents =~ m/(create table $table [^\;]+)\;/i;
×
367
  
368
  if (!$stmt) {
×
369
    warn("Could not find CREATE TABLE statement for $table");
×
370
  }
371
  
372
  return $stmt;
×
373
}
374

375
sub update_table {
376
  my $db = shift;
×
377
  my $source_table = shift;
×
378
  my $target_table = shift;
×
379
  my $source_key = shift;
×
380
  my $target_key = shift;
×
381
  my $source_col = shift;
×
382
  my $target_col = shift;
×
383
  my $clean = shift;
×
384
  
385
  die("Incorrect arguments supplied to update_table\n") unless
×
386
    defined($db) &&
387
    defined($source_table) &&
388
    defined($target_table) &&
389
    defined($source_key) &&
390
    defined($target_key) &&
391
    defined($source_col) &&
392
    defined($target_col);
393
  
394
  my ($stmt, $sth);
×
395
  
396
  # get columns of source table
397
  $stmt = qq{
×
398
    DESCRIBE $source_table
399
  };
400
  $sth = $db->prepare($stmt);
×
401
  $sth->execute;
×
402
  
403
  my @source_cols = map {$_->[0]} @{$sth->fetchall_arrayref};
×
404
  $sth->finish;
×
405
  
406
  die("No columns found in table $source_table\n") unless @source_cols;
×
407
  die("Key column $source_key not found in source table $source_table\n") unless grep {$_ eq $source_key} @source_cols;
×
408
  die("Data column $source_col not found in source table $source_table\n") unless grep {$_ eq $source_col} @source_cols;
×
409
  
410
  # get columns of target table
411
  $stmt = qq{
×
412
    DESCRIBE $target_table
413
  };
414
  $sth = $db->prepare($stmt);
×
415
  $sth->execute;
×
416
  
417
  my @target_cols = map {$_->[0]} @{$sth->fetchall_arrayref};
×
418
  $sth->finish;
×
419
  
420
  die("No columns found in table $target_table\n") unless @target_cols;
×
421
  die("Key column $target_key not found in target table $target_table\n") unless grep {$_ eq $target_key} @target_cols;
×
422
  die("Data column $target_col not found in target table $target_table\n") unless grep {$_ eq $target_col} @target_cols;
×
423
  
424
  # construct columns to select
425
  my (@select_cols, $select_cols);
×
426
  
427
  foreach my $col(@target_cols) {
×
428
    if($col eq $target_col) {
×
429
      if($clean) {
×
430
        push @select_cols, $source_table.'.'.$source_col;
×
431
      }
432
      else {
433
        push @select_cols, qq{if($source_table.$source_col is null, $target_table.$col, $source_table.$source_col)};
×
434
      }
435
    }
436
    else {
437
      push @select_cols, $target_table.'.'.$col;
×
438
    }
439
  }
440
  
441
  $select_cols = join ",", @select_cols;
×
442
  
443
  # create tmp table
444
  my $tmp_table = $target_table.'_tmp_'.$$;
×
445
  
446
  $stmt = qq{
×
447
    CREATE TABLE $tmp_table LIKE $target_table
448
  };
449
  $db->do($stmt);
×
450
  
451
  # construct SQL
452
  $stmt = qq{
×
453
    INSERT INTO $tmp_table
454
    SELECT $select_cols
455
    FROM $target_table
456
    LEFT JOIN $source_table
457
    ON $target_table.$target_key = $source_table.$source_key
458
  };
459
  
460
  $db->do($stmt);
×
461
  
462
  # check things went OK
463
  $sth = $db->prepare("SELECT COUNT(*) FROM $target_table");
×
464
  $sth->execute();
×
465
  my $old_count = $sth->fetchall_arrayref()->[0]->[0];
×
466
  $sth->finish();
×
467
  
468
  $sth = $db->prepare("SELECT COUNT(*) FROM $tmp_table");
×
469
  $sth->execute();
×
470
  my $new_count = $sth->fetchall_arrayref()->[0]->[0];
×
471
  $sth->finish();
×
472
  
473
  die("New tmp table ($new_count) does not have the same number of rows as original target table ($old_count)\n") unless $old_count == $new_count;
×
474
  
475
  # rename and drop
476
  $db->do(qq{DROP TABLE $target_table});
×
477
  $db->do(qq{RENAME TABLE $tmp_table TO $target_table});
×
478
}
479

480
sub debug {
481
  print STDERR @_, "\n";
×
482
}
483

484

485

486
1;
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc