• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #45

20 Sep 2024 06:49PM UTC coverage: 26.244% (-49.2%) from 75.443%
#45

push

travis-ci

web-flow
attempt to fix coveralls run

2458 of 9366 relevant lines covered (26.24%)

4362.23 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main_benchmark.cpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2024 by Mikkel Schubert - mikkelsch@gmail.com           *
5
 *                                                                       *
6
 * This program is free software: you can redistribute it and/or modify  *
7
 * it under the terms of the GNU General Public License as published by  *
8
 * the Free Software Foundation, either version 3 of the License, or     *
9
 * (at your option) any later version.                                   *
10
 *                                                                       *
11
 * This program is distributed in the hope that it will be useful,       *
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
14
 * GNU General Public License for more details.                          *
15
 *                                                                       *
16
 * You should have received a copy of the GNU General Public License     *
17
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
18
\*************************************************************************/
19
#include "alignment.hpp"         // for alignment_info, sequence_aligner
20
#include "benchmarking.hpp"      // for benchmarker
21
#include "fastq.hpp"             // for fastq
22
#include "linereader_joined.hpp" // for joined_line_readers
23
#include "logging.hpp"           // for log
24
#include "sequence_sets.hpp"     // for adapter_set
25
#include "statistics.hpp"        // for fastq_statistics
26
#include "strutils.hpp"          // for to_lower
27
#include "userconfig.hpp"        // for userconfig
28
#include <cstdint>               // for size_t
29
#include <vector>                // for vector
30

31
namespace adapterremoval {
32

33
namespace {
34

35
#ifdef __clang__
36
#define NO_OPTIMIZE_CLANG __attribute__((optnone))
37
#define NO_OPTIMIZE_GCC
38
#else
39
#define NO_OPTIMIZE_CLANG
40
#define NO_OPTIMIZE_GCC __attribute__((optimize("O0")))
41
#endif
42

43
/** Unoptimized to prevent calculations from being elided by the compiler */
44
template<typename T>
45
void NO_OPTIMIZE_GCC
46
blackbox(T& /* unused */) NO_OPTIMIZE_CLANG
×
47
{
48
}
49

×
50
class readlines_benchmarker : public benchmarker
51
{
52
public:
×
53
  readlines_benchmarker(string_vec filenames_1,
54
                        string_vec filenames_2,
55
                        const size_t head)
×
56
    : benchmarker("FASTQ reading", { "read" })
57
    , m_filenames_1(std::move(filenames_1))
58
    , m_filenames_2(std::move(filenames_2))
×
59
    , m_head(head)
60
  {
61
    set_required();
×
62
  }
63

64
  const string_vec& lines_1() const { return m_lines_1; }
65

66
  const string_vec& lines_2() const { return m_lines_2; }
67

68
protected:
×
69
  void setup() override
70
  {
71
    m_lines_1 = string_vec();
×
72
    m_lines_2 = string_vec();
×
73
  }
×
74

×
75
  void execute() override
76
  {
×
77
    read_lines(m_filenames_1, m_lines_1);
78
    read_lines(m_filenames_2, m_lines_2);
79

×
80
    blackbox(m_lines_1);
81
    blackbox(m_lines_2);
×
82
  }
83

84
private:
×
85
  void read_lines(const string_vec& filenames, string_vec& lines) const
86
  {
×
87
    joined_line_readers reader(filenames);
×
88
    while (lines.size() / 4 < m_head) {
89
      lines.emplace_back(std::string());
90
      if (!reader.getline(lines.back())) {
×
91
        lines.pop_back();
92
        break;
×
93
      }
×
94
    }
95
  }
×
96

×
97
  string_vec m_filenames_1{};
98
  string_vec m_filenames_2{};
99
  size_t m_head = 0;
100
  // Vector containing files set of lines read
×
101
  string_vec m_lines_1{};
102
  string_vec m_lines_2{};
×
103
};
×
104

×
105
/** Benchmarking of FASTQ parsing excluding file IO */
×
106
class fastq_parser_benchmarker : public benchmarker
×
107
{
×
108
public:
109
  fastq_parser_benchmarker(const string_vec& lines_1, const string_vec& lines_2)
110
    : benchmarker("FASTQ parsing", { "parse" })
111
    , m_lines_1(lines_1)
112
    , m_lines_2(lines_2)
113
  {
114
    set_required();
115
  }
116

117
  const fastq_vec& records_1() const { return m_records_1; }
118

119
  const fastq_vec& records_2() const { return m_records_2; }
120

121
protected:
122
  void setup() override
123
  {
124
    m_records_1 = fastq_vec();
×
125
    m_records_2 = fastq_vec();
×
126
  }
×
127

×
128
  void execute() override
129
  {
×
130
    fastq record;
131
    {
132
      vec_reader reader_1(m_lines_1);
×
133
      while (record.read(reader_1, FASTQ_ENCODING_33)) {
134
        m_records_1.push_back(record);
×
135
      }
136
    }
137

×
138
    {
139
      vec_reader reader_2(m_lines_2);
×
140
      while (record.read(reader_2, FASTQ_ENCODING_33)) {
×
141
        m_records_2.push_back(record);
142
      }
143
    }
×
144

145
    blackbox(m_records_1);
×
146
    blackbox(m_records_2);
×
147
  }
×
148

×
149
private:
×
150
  const string_vec& m_lines_1;
151
  const string_vec& m_lines_2;
152
  fastq_vec m_records_1{};
153
  fastq_vec m_records_2{};
×
154
};
×
155

×
156
class reverse_complement_benchmarker : public benchmarker
×
157
{
158
public:
159
  reverse_complement_benchmarker(fastq_vec records_1, fastq_vec records_2)
160
    : benchmarker("reverse complement", { "revcompl" })
×
161
    , m_records_1(std::move(records_1))
×
162
    , m_records_2(std::move(records_2))
163
  {
164
  }
165

166
protected:
167
  void execute() override
168
  {
169
    for (auto& it : m_records_1) {
170
      it.reverse_complement();
171
    }
172

173
    for (auto& it : m_records_2) {
174
      it.reverse_complement();
×
175
    }
×
176
  }
×
177

×
178
private:
179
  fastq_vec m_records_1{};
180
  fastq_vec m_records_2{};
181
};
182

×
183
class complexity_benchmarker : public benchmarker
184
{
×
185
public:
×
186
  complexity_benchmarker(const fastq_vec& records_1, const fastq_vec& records_2)
187
    : benchmarker("read complexity", { "complexity" })
188
    , m_records_1(records_1)
×
189
    , m_records_2(records_2)
×
190
  {
191
  }
192

193
protected:
194
  void execute() override
195
  {
196
    double total = 0.0;
197
    total += complexity(m_records_1);
198
    total += complexity(m_records_2);
×
199

200
    blackbox(total);
201
  }
×
202

×
203
private:
×
204
  static double complexity(const fastq_vec& records)
×
205
  {
206
    double total = 0.0;
207
    for (const auto& it : records) {
208
      total += it.complexity();
209
    }
×
210

211
    return total;
×
212
  }
×
213

×
214
  const fastq_vec& m_records_1;
215
  const fastq_vec& m_records_2;
×
216
};
217

218
class trimming_benchmarker : public benchmarker
219
{
220
public:
221
  trimming_benchmarker(const std::string& desc,
×
222
                       const std::string& toggle,
×
223
                       const fastq_vec& records_1,
×
224
                       const fastq_vec& records_2)
225
    : benchmarker(desc, { "trim", "trim:" + toggle })
226
    , m_records_1(records_1)
×
227
    , m_records_2(records_2)
228
  {
229
  }
230

231
protected:
232
  void setup() override
233
  {
234
    m_trimmed_records_1 = m_records_1;
235
    m_trimmed_records_2 = m_records_2;
236
  }
×
237

238
  void execute() override
239
  {
240
    trim(m_trimmed_records_1);
×
241
    trim(m_trimmed_records_2);
×
242

×
243
    blackbox(m_trimmed_records_1);
244
    blackbox(m_trimmed_records_2);
245
  }
246

247
  virtual void trim(fastq_vec& reads) const = 0;
×
248

249
private:
×
250
  const fastq_vec& m_records_1;
×
251
  const fastq_vec& m_records_2;
252
  fastq_vec m_trimmed_records_1{};
253
  fastq_vec m_trimmed_records_2{};
×
254
};
255

×
256
class basic_trimming_benchmarker : public trimming_benchmarker
×
257
{
258
public:
×
259
  basic_trimming_benchmarker(const fastq_vec& records_1,
×
260
                             const fastq_vec& records_2)
261
    : trimming_benchmarker("basic trimming", "basic", records_1, records_2)
262
  {
263
  }
264

265
protected:
266
  void trim(fastq_vec& reads) const override
267
  {
268
    for (auto& read : reads) {
269
      read.trim_trailing_bases(true, 2);
270
    }
271
  }
×
272
};
273

274
class mott_trimming_benchmarker : public trimming_benchmarker
×
275
{
276
public:
×
277
  mott_trimming_benchmarker(const fastq_vec& records_1,
278
                            const fastq_vec& records_2)
279
    : trimming_benchmarker("mott trimming", "mott", records_1, records_2)
280
  {
281
  }
×
282

283
protected:
×
284
  void trim(fastq_vec& reads) const override
×
285
  {
286
    for (auto& read : reads) {
287
      read.mott_trimming(0.05);
288
    }
289
  }
×
290
};
291

292
class window_trimming_benchmarker : public trimming_benchmarker
×
293
{
294
public:
×
295
  window_trimming_benchmarker(const fastq_vec& records_1,
296
                              const fastq_vec& records_2)
297
    : trimming_benchmarker("window trimming", "window", records_1, records_2)
298
  {
299
  }
×
300

301
protected:
×
302
  void trim(fastq_vec& reads) const override
×
303
  {
304
    for (auto& read : reads) {
305
      read.trim_windowed_bases(true, 2, 0.1);
306
    }
307
  }
×
308
};
309

310
/** Class for benchmarking collection of `fastq_statistics` */
×
311
class fastq_statistics_benchmarker : public benchmarker
312
{
×
313
public:
314
  fastq_statistics_benchmarker(const fastq_vec& records_1,
315
                               const fastq_vec& records_2)
316
    : benchmarker("read statistics", { "stats" })
317
    , m_records_1(records_1)
×
318
    , m_records_2(records_2)
319
  {
×
320
  }
×
321

322
  void execute() override
323
  {
324
    collect_statistics(m_records_1);
325
    collect_statistics(m_records_2);
326
  }
×
327

328
private:
329
  static void collect_statistics(const fastq_vec& records)
×
330
  {
331
    fastq_statistics stats;
×
332
    for (const auto& it : records) {
×
333
      stats.process(it);
×
334
    }
335

336
    blackbox(stats);
337
  }
×
338

339
  const fastq_vec& m_records_1;
×
340
  const fastq_vec& m_records_2;
×
341
};
342

343
/** Base-class for benchmarking SE/PE alignments */
344
class alignment_benchmarker : public benchmarker
345
{
346
public:
×
347
  alignment_benchmarker(const std::string& key, const simd::instruction_set is)
×
348
    : benchmarker(to_upper(key) + " alignment (" + simd::name(is) + ")", {})
×
349
    , m_key(key)
350
    , m_is(is)
351
  {
×
352
  }
353

354
  strategy enabled(const benchmark_toggles& toggles) const override
355
  {
356
    if (toggles.defaults() || toggles.is_set("align") ||
357
        toggles.is_set("align:" + m_key)) {
358

359
      if (toggles.is_set("simd") ||
360
          toggles.is_set("simd:" + to_lower(simd::name(m_is)))) {
361
        return strategy::benchmark;
362
      }
×
363

×
364
      // Benchmark the preferred algorithm if no algorithms were specified
×
365
      const auto supported = simd::supported();
×
366
      if (!supported.empty() && supported.back() == m_is) {
367
        for (const auto is : supported) {
368
          if (toggles.is_set("simd:" + to_lower(simd::name(is)))) {
369
            return strategy::skip;
×
370
          }
371
        }
×
372

×
373
        return strategy::benchmark;
374
      }
×
375
    }
×
376

×
377
    return strategy::skip;
378
  }
379

380
private:
×
381
  const std::string m_key;
×
382
  const simd::instruction_set m_is;
×
383
};
×
384

×
385
/** Benchmarking of SE alignments */
386
class benchmarker_se_alignment : public alignment_benchmarker
387

388
{
×
389
public:
390
  benchmarker_se_alignment(const userconfig& config,
391
                           const fastq_vec& reads,
392
                           const simd::instruction_set is)
393
    : alignment_benchmarker("se", is)
394
    , m_config(config)
395
    , m_reads(reads)
396
    , m_adapters(config.samples.adapters())
397
    , m_aligner(m_adapters, is)
398
  {
399
  }
400

401
protected:
402
  void execute() override
403
  {
404
    alignment_info best;
405

×
406
    for (const auto& read : m_reads) {
407
      const alignment_info alignment =
408
        m_aligner.align_single_end(read, m_config.shift);
×
409

×
410
      if (alignment.score() > best.score()) {
×
411
        best = alignment;
×
412
      }
×
413
    }
414

415
    blackbox(best);
416
  }
417

×
418
private:
419
  const userconfig& m_config;
×
420
  const fastq_vec& m_reads;
421
  const adapter_set m_adapters;
×
422
  sequence_aligner m_aligner;
×
423
};
×
424

425
/** Benchmarking of PE alignments */
×
426
class pe_alignment_benchmarker : public alignment_benchmarker
×
427
{
428
public:
429
  pe_alignment_benchmarker(const userconfig& config,
430
                           const fastq_vec& mate_1,
×
431
                           const fastq_vec& mate_2,
432
                           const simd::instruction_set is)
433
    : alignment_benchmarker("pe", is)
434
    , m_config(config)
435
    , m_mate_1(mate_1)
436
    , m_mate_2(mate_2)
437
    , m_adapters(config.samples.adapters())
438
    , m_aligner(m_adapters, is)
439
  {
440
  }
441

442
protected:
443
  void setup() override
444
  {
×
445
    if (m_mate_2_reversed.empty()) {
446
      m_mate_2_reversed = m_mate_2;
447

448
      // Done as part of the alignment loop but is benchmarked separately
×
449
      for (auto& it : m_mate_2_reversed) {
×
450
        it.reverse_complement();
×
451
      }
×
452
    }
×
453
  }
×
454

455
  void execute() override
456
  {
457
    AR_REQUIRE(m_mate_1.size() == m_mate_2_reversed.size());
458

×
459
    alignment_info best;
460

×
461
    auto it_1 = m_mate_1.begin();
×
462
    auto it_2 = m_mate_2_reversed.begin();
463
    while (it_1 != m_mate_1.end()) {
464
      const fastq& read_1 = *it_1++;
×
465
      const fastq& read_2 = *it_2++;
×
466

467
      const alignment_info alignment =
468
        m_aligner.align_paired_end(read_1, read_2, m_config.shift);
469

470
      if (alignment.score() > best.score()) {
×
471
        best = alignment;
472
      }
×
473
    }
474

×
475
    blackbox(best);
476
  }
×
477

×
478
private:
×
479
  const userconfig& m_config;
×
480
  const fastq_vec& m_mate_1;
×
481
  const fastq_vec& m_mate_2;
482
  fastq_vec m_mate_2_reversed{};
×
483
  adapter_set m_adapters;
×
484
  sequence_aligner m_aligner;
485
};
×
486

×
487
string_vec
488
supported_toggles()
489
{
490
  string_vec toggles = { "read",  "parse",      "revcompl",  "complexity",
×
491
                         "trim",  "trim:basic", "trim:mott", "trim:window",
492
                         "stats", "align",      "align:se",  "align:pe",
493
                         "simd" };
494

495
  for (const auto is : simd::supported()) {
496
    toggles.push_back(std::string("simd:") + to_lower(simd::name(is)));
497
  }
498

499
  return toggles;
500
}
501

502
} // namespace
503

×
504
int
505
benchmark(const userconfig& config)
×
506
{
507
  auto head = config.head;
508
  // Limit benchmarking to a reasonable data-set size by default
×
509
  if (config.head == std::numeric_limits<uint64_t>::max()) {
510
    log::warn() << "Defaulting to reading at most 1,000,000 reads/mate pairs";
×
511
    head = 1000000;
×
512
  }
513

514
  // Parse user-specified benchmarking toggles
×
515
  benchmark_toggles toggles(supported_toggles());
×
516
  if (!toggles.update_toggles(config.benchmarks)) {
517
    return 1;
518
  }
519

520
  readlines_benchmarker lines{ config.input_files_1,
×
521
                               config.input_files_2,
522
                               head };
×
523
  lines.run_if_toggled(toggles);
524

×
525
  fastq_parser_benchmarker records{ lines.lines_1(), lines.lines_2() };
×
526
  records.run_if_toggled(toggles);
×
527

528
  reverse_complement_benchmarker(records.records_1(), records.records_2())
529
    .run_if_toggled(toggles);
530

×
531
  complexity_benchmarker(records.records_1(), records.records_2())
×
532
    .run_if_toggled(toggles);
533

534
  basic_trimming_benchmarker(records.records_1(), records.records_2())
535
    .run_if_toggled(toggles);
×
536

×
537
  mott_trimming_benchmarker(records.records_1(), records.records_2())
×
538
    .run_if_toggled(toggles);
×
539

540
  window_trimming_benchmarker(records.records_1(), records.records_2())
×
541
    .run_if_toggled(toggles);
×
542

543
  fastq_statistics_benchmarker(records.records_1(), records.records_2())
×
544
    .run_if_toggled(toggles);
×
545

546
  for (const auto is : simd::supported()) {
×
547
    benchmarker_se_alignment(config, records.records_1(), is)
×
548
      .run_if_toggled(toggles);
549
  }
×
550

×
551
  if (config.paired_ended_mode) {
552
    for (const auto is : simd::supported()) {
×
553
      pe_alignment_benchmarker(
×
554
        config, records.records_1(), records.records_2(), is)
555
        .run_if_toggled(toggles);
×
556
    }
×
557
  }
558

×
559
  return 0;
×
560
}
561

×
562
} // namespace adapterremoval
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc