• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #117

25 May 2025 03:01PM UTC coverage: 66.932% (-0.07%) from 67.006%
#117

push

travis-ci

web-flow
iwyu and reduce build-time inter-dependencies (#144)

26 of 145 new or added lines in 20 files covered. (17.93%)

89 existing lines in 5 files now uncovered.

9738 of 14549 relevant lines covered (66.93%)

3041.19 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main_benchmark.cpp
1
// SPDX-License-Identifier: GPL-3.0-or-later
2
// SPDX-FileCopyrightText: 2024 Mikkel Schubert <mikkelsch@gmail.com>
3
#include "alignment.hpp"         // for alignment_info, sequence_aligner
4
#include "benchmarking.hpp"      // for benchmarker
5
#include "debug.hpp"             // for AR_REQUIRE
6
#include "fastq.hpp"             // for fastq
7
#include "fastq_enc.hpp"         // for FASTQ_ENCODING_33
8
#include "linereader.hpp"        // for vec_reader
9
#include "linereader_joined.hpp" // for joined_line_readers
10
#include "logging.hpp"           // for log
11
#include "sequence_sets.hpp"     // for adapter_set
12
#include "simd.hpp"              // for name, supported, instruction_set (p...
13
#include "statistics.hpp"        // for fastq_statistics
14
#include "strutils.hpp"          // for to_lower
15
#include "userconfig.hpp"        // for userconfig
16
#include <cstddef>               // for size_t
17
#include <cstdint>               // for uint64_t
18
#include <limits>                // for numeric_limits
19
#include <memory>                // for unique_ptr
20
#include <string>                // for string+, char_traits
21
#include <utility>               // for move
22
#include <vector>                // for vector
23

24
namespace adapterremoval {
25

26
namespace {
27

28
#ifdef __clang__
29
#define NO_OPTIMIZE_CLANG __attribute__((optnone))
30
#define NO_OPTIMIZE_GCC
31
#else
32
#define NO_OPTIMIZE_CLANG
33
#define NO_OPTIMIZE_GCC __attribute__((optimize("O0")))
34
#endif
35

36
/** Unoptimized to prevent calculations from being elided by the compiler */
37
template<typename T>
38
void NO_OPTIMIZE_GCC
39
blackbox(T& /* unused */) NO_OPTIMIZE_CLANG
×
40
{
41
}
42

×
43
class readlines_benchmarker : public benchmarker
44
{
45
public:
×
46
  readlines_benchmarker(string_vec filenames_1,
47
                        string_vec filenames_2,
48
                        const size_t head)
×
49
    : benchmarker("FASTQ reading", { "read" })
50
    , m_filenames_1(std::move(filenames_1))
51
    , m_filenames_2(std::move(filenames_2))
×
52
    , m_head(head)
53
  {
54
    set_required();
×
55
  }
56

57
  const string_vec& lines_1() const { return m_lines_1; }
58

59
  const string_vec& lines_2() const { return m_lines_2; }
60

61
protected:
×
62
  void setup() override
63
  {
64
    m_lines_1 = string_vec();
×
65
    m_lines_2 = string_vec();
×
66
  }
×
67

×
68
  void execute() override
69
  {
×
70
    read_lines(m_filenames_1, m_lines_1);
71
    read_lines(m_filenames_2, m_lines_2);
72

×
73
    blackbox(m_lines_1);
74
    blackbox(m_lines_2);
×
75
  }
76

77
private:
×
78
  void read_lines(const string_vec& filenames, string_vec& lines) const
79
  {
×
80
    joined_line_readers reader(filenames);
×
81
    while (lines.size() / 4 < m_head) {
82
      lines.emplace_back(std::string());
83
      if (!reader.getline(lines.back())) {
×
84
        lines.pop_back();
85
        break;
×
86
      }
×
87
    }
88
  }
×
89

×
90
  string_vec m_filenames_1{};
91
  string_vec m_filenames_2{};
92
  size_t m_head = 0;
93
  // Vector containing files set of lines read
×
94
  string_vec m_lines_1{};
95
  string_vec m_lines_2{};
×
96
};
×
97

×
98
/** Benchmarking of FASTQ parsing excluding file IO */
×
99
class fastq_parser_benchmarker : public benchmarker
×
100
{
×
101
public:
102
  fastq_parser_benchmarker(const string_vec& lines_1, const string_vec& lines_2)
103
    : benchmarker("FASTQ parsing", { "parse" })
104
    , m_lines_1(lines_1)
105
    , m_lines_2(lines_2)
106
  {
107
    set_required();
108
  }
109

110
  const std::vector<fastq>& records_1() const { return m_records_1; }
111

112
  const std::vector<fastq>& records_2() const { return m_records_2; }
113

114
protected:
115
  void setup() override
116
  {
NEW
117
    m_records_1 = std::vector<fastq>();
×
NEW
118
    m_records_2 = std::vector<fastq>();
×
119
  }
×
120

×
121
  void execute() override
122
  {
×
123
    fastq record;
124
    {
125
      vec_reader reader_1(m_lines_1);
×
126
      while (record.read(reader_1, FASTQ_ENCODING_33)) {
127
        m_records_1.push_back(record);
×
128
      }
129
    }
130

×
131
    {
132
      vec_reader reader_2(m_lines_2);
×
133
      while (record.read(reader_2, FASTQ_ENCODING_33)) {
×
134
        m_records_2.push_back(record);
135
      }
136
    }
×
137

138
    blackbox(m_records_1);
×
139
    blackbox(m_records_2);
×
140
  }
×
141

×
142
private:
×
143
  const string_vec& m_lines_1;
144
  const string_vec& m_lines_2;
145
  std::vector<fastq> m_records_1{};
NEW
146
  std::vector<fastq> m_records_2{};
×
147
};
×
148

×
149
class reverse_complement_benchmarker : public benchmarker
×
150
{
151
public:
152
  reverse_complement_benchmarker(std::vector<fastq> records_1,
NEW
153
                                 std::vector<fastq> records_2)
×
154
    : benchmarker("reverse complement", { "revcompl" })
×
155
    , m_records_1(std::move(records_1))
156
    , m_records_2(std::move(records_2))
157
  {
158
  }
159

160
protected:
161
  void execute() override
162
  {
163
    for (auto& it : m_records_1) {
164
      it.reverse_complement();
165
    }
166

UNCOV
167
    for (auto& it : m_records_2) {
×
168
      it.reverse_complement();
169
    }
×
170
  }
×
171

×
172
private:
173
  std::vector<fastq> m_records_1{};
174
  std::vector<fastq> m_records_2{};
175
};
176

×
177
class complexity_benchmarker : public benchmarker
178
{
×
179
public:
×
180
  complexity_benchmarker(const std::vector<fastq>& records_1,
181
                         const std::vector<fastq>& records_2)
182
    : benchmarker("read complexity", { "complexity" })
×
183
    , m_records_1(records_1)
×
184
    , m_records_2(records_2)
185
  {
186
  }
187

188
protected:
189
  void execute() override
190
  {
191
    double total = 0.0;
192
    total += complexity(m_records_1);
193
    total += complexity(m_records_2);
194

UNCOV
195
    blackbox(total);
×
196
  }
197

×
198
private:
×
NEW
199
  static double complexity(const std::vector<fastq>& records)
×
200
  {
201
    double total = 0.0;
202
    for (const auto& it : records) {
203
      total += it.complexity();
204
    }
×
205

206
    return total;
×
207
  }
×
208

×
209
  const std::vector<fastq>& m_records_1;
NEW
210
  const std::vector<fastq>& m_records_2;
×
211
};
212

213
class trimming_benchmarker : public benchmarker
214
{
215
public:
216
  trimming_benchmarker(const std::string& desc,
×
217
                       const std::string& toggle,
×
NEW
218
                       const std::vector<fastq>& records_1,
×
219
                       const std::vector<fastq>& records_2)
220
    : benchmarker(desc, { "trim", "trim:" + toggle })
221
    , m_records_1(records_1)
×
222
    , m_records_2(records_2)
223
  {
224
  }
225

226
protected:
227
  void setup() override
228
  {
229
    m_trimmed_records_1 = m_records_1;
230
    m_trimmed_records_2 = m_records_2;
231
  }
×
232

233
  void execute() override
234
  {
235
    trim(m_trimmed_records_1);
×
236
    trim(m_trimmed_records_2);
×
237

×
238
    blackbox(m_trimmed_records_1);
239
    blackbox(m_trimmed_records_2);
240
  }
241

NEW
242
  virtual void trim(std::vector<fastq>& reads) const = 0;
×
243

244
private:
×
NEW
245
  const std::vector<fastq>& m_records_1;
×
246
  const std::vector<fastq>& m_records_2;
247
  std::vector<fastq> m_trimmed_records_1{};
NEW
248
  std::vector<fastq> m_trimmed_records_2{};
×
249
};
250

×
251
class basic_trimming_benchmarker : public trimming_benchmarker
×
252
{
253
public:
×
NEW
254
  basic_trimming_benchmarker(const std::vector<fastq>& records_1,
×
255
                             const std::vector<fastq>& records_2)
256
    : trimming_benchmarker("basic trimming", "basic", records_1, records_2)
257
  {
258
  }
259

260
protected:
261
  void trim(std::vector<fastq>& reads) const override
262
  {
263
    for (auto& read : reads) {
264
      read.trim_trailing_bases(true, 2);
265
    }
266
  }
267
};
268

269
class mott_trimming_benchmarker : public trimming_benchmarker
×
270
{
271
public:
×
272
  mott_trimming_benchmarker(const std::vector<fastq>& records_1,
273
                            const std::vector<fastq>& records_2)
274
    : trimming_benchmarker("mott trimming", "mott", records_1, records_2)
275
  {
276
  }
×
277

278
protected:
×
NEW
279
  void trim(std::vector<fastq>& reads) const override
×
280
  {
281
    for (auto& read : reads) {
282
      read.mott_trimming(0.05);
283
    }
284
  }
285
};
286

287
class window_trimming_benchmarker : public trimming_benchmarker
×
288
{
289
public:
×
290
  window_trimming_benchmarker(const std::vector<fastq>& records_1,
291
                              const std::vector<fastq>& records_2)
292
    : trimming_benchmarker("window trimming", "window", records_1, records_2)
293
  {
294
  }
×
295

296
protected:
×
NEW
297
  void trim(std::vector<fastq>& reads) const override
×
298
  {
299
    for (auto& read : reads) {
300
      read.trim_windowed_bases(true, 2, 0.1);
301
    }
302
  }
303
};
304

305
/** Class for benchmarking collection of `fastq_statistics` */
×
306
class fastq_statistics_benchmarker : public benchmarker
307
{
×
308
public:
309
  fastq_statistics_benchmarker(const std::vector<fastq>& records_1,
310
                               const std::vector<fastq>& records_2)
311
    : benchmarker("read statistics", { "stats" })
312
    , m_records_1(records_1)
×
313
    , m_records_2(records_2)
314
  {
×
315
  }
×
316

317
  void execute() override
318
  {
319
    collect_statistics(m_records_1);
320
    collect_statistics(m_records_2);
321
  }
322

323
private:
NEW
324
  static void collect_statistics(const std::vector<fastq>& records)
×
325
  {
326
    fastq_statistics stats;
×
327
    for (const auto& it : records) {
×
328
      stats.process(it);
×
329
    }
330

331
    blackbox(stats);
332
  }
×
333

NEW
334
  const std::vector<fastq>& m_records_1;
×
NEW
335
  const std::vector<fastq>& m_records_2;
×
336
};
337

338
/** Base-class for benchmarking SE/PE alignments */
339
class alignment_benchmarker : public benchmarker
340
{
341
public:
×
342
  alignment_benchmarker(const std::string& key, const simd::instruction_set is)
×
343
    : benchmarker(to_upper(key) + " alignment (" + simd::name(is) + ")", {})
×
344
    , m_key(key)
345
    , m_is(is)
346
  {
×
347
  }
348

349
  strategy enabled(const benchmark_toggles& toggles) const override
350
  {
351
    if (toggles.defaults() || toggles.is_set("align") ||
352
        toggles.is_set("align:" + m_key)) {
353

354
      if (toggles.is_set("simd") ||
355
          toggles.is_set("simd:" + to_lower(simd::name(m_is)))) {
356
        return strategy::benchmark;
357
      }
×
358

×
359
      // Benchmark the preferred algorithm if no algorithms were specified
×
360
      const auto supported = simd::supported();
×
361
      if (!supported.empty() && supported.back() == m_is) {
362
        for (const auto is : supported) {
363
          if (toggles.is_set("simd:" + to_lower(simd::name(is)))) {
364
            return strategy::skip;
×
365
          }
366
        }
×
367

×
368
        return strategy::benchmark;
369
      }
×
370
    }
×
371

×
372
    return strategy::skip;
373
  }
374

375
private:
×
376
  const std::string m_key;
×
377
  const simd::instruction_set m_is;
×
378
};
×
379

×
380
/** Benchmarking of SE alignments */
381
class benchmarker_se_alignment : public alignment_benchmarker
382

383
{
×
384
public:
385
  benchmarker_se_alignment(const userconfig& config,
386
                           const std::vector<fastq>& reads,
387
                           const simd::instruction_set is)
388
    : alignment_benchmarker("se", is)
389
    , m_config(config)
390
    , m_reads(reads)
391
    , m_adapters(config.samples->adapters())
392
    , m_aligner(m_adapters, is)
393
  {
394
  }
395

396
protected:
397
  void execute() override
398
  {
399
    alignment_info best;
400

×
401
    for (const auto& read : m_reads) {
402
      const alignment_info alignment =
403
        m_aligner.align_single_end(read, m_config.shift);
×
404

×
405
      if (alignment.score() > best.score()) {
×
406
        best = alignment;
×
407
      }
×
408
    }
409

410
    blackbox(best);
411
  }
412

×
413
private:
414
  const userconfig& m_config;
×
415
  const std::vector<fastq>& m_reads;
416
  const adapter_set m_adapters;
×
417
  sequence_aligner m_aligner;
×
418
};
×
419

420
/** Benchmarking of PE alignments */
×
421
class pe_alignment_benchmarker : public alignment_benchmarker
×
422
{
423
public:
424
  pe_alignment_benchmarker(const userconfig& config,
NEW
425
                           const std::vector<fastq>& mate_1,
×
426
                           const std::vector<fastq>& mate_2,
427
                           const simd::instruction_set is)
428
    : alignment_benchmarker("pe", is)
429
    , m_config(config)
430
    , m_mate_1(mate_1)
431
    , m_mate_2(mate_2)
432
    , m_adapters(config.samples->adapters())
433
    , m_aligner(m_adapters, is)
434
  {
435
  }
436

437
protected:
438
  void setup() override
439
  {
×
440
    if (m_mate_2_reversed.empty()) {
441
      m_mate_2_reversed = m_mate_2;
442

443
      // Done as part of the alignment loop but is benchmarked separately
×
444
      for (auto& it : m_mate_2_reversed) {
×
445
        it.reverse_complement();
×
446
      }
×
447
    }
×
448
  }
×
449

450
  void execute() override
451
  {
452
    AR_REQUIRE(m_mate_1.size() == m_mate_2_reversed.size());
453

×
454
    alignment_info best;
455

×
456
    auto it_1 = m_mate_1.begin();
×
457
    auto it_2 = m_mate_2_reversed.begin();
458
    while (it_1 != m_mate_1.end()) {
459
      const fastq& read_1 = *it_1++;
×
460
      const fastq& read_2 = *it_2++;
×
461

462
      const alignment_info alignment =
463
        m_aligner.align_paired_end(read_1, read_2, m_config.shift);
464

465
      if (alignment.score() > best.score()) {
×
466
        best = alignment;
467
      }
×
468
    }
469

×
470
    blackbox(best);
471
  }
×
472

×
473
private:
×
474
  const userconfig& m_config;
×
NEW
475
  const std::vector<fastq>& m_mate_1;
×
476
  const std::vector<fastq>& m_mate_2;
NEW
477
  std::vector<fastq> m_mate_2_reversed{};
×
UNCOV
478
  adapter_set m_adapters;
×
479
  sequence_aligner m_aligner;
480
};
×
481

×
482
string_vec
483
supported_toggles()
484
{
485
  string_vec toggles = { "read",  "parse",      "revcompl",  "complexity",
×
486
                         "trim",  "trim:basic", "trim:mott", "trim:window",
487
                         "stats", "align",      "align:se",  "align:pe",
488
                         "simd" };
489

490
  for (const auto is : simd::supported()) {
491
    toggles.push_back(std::string("simd:") + to_lower(simd::name(is)));
492
  }
493

494
  return toggles;
495
}
496

497
} // namespace
498

×
499
int
500
benchmark(const userconfig& config)
×
501
{
502
  auto head = config.head;
503
  // Limit benchmarking to a reasonable data-set size by default
×
504
  if (config.head == std::numeric_limits<uint64_t>::max()) {
505
    log::warn() << "Defaulting to reading at most 1,000,000 reads/mate pairs";
×
506
    head = 1000000;
×
507
  }
508

509
  // Parse user-specified benchmarking toggles
×
510
  benchmark_toggles toggles(supported_toggles());
×
511
  if (!toggles.update_toggles(config.benchmarks)) {
512
    return 1;
513
  }
514

515
  readlines_benchmarker lines{ config.input_files_1,
×
516
                               config.input_files_2,
517
                               head };
×
518
  lines.run_if_toggled(toggles);
519

×
520
  fastq_parser_benchmarker records{ lines.lines_1(), lines.lines_2() };
×
521
  records.run_if_toggled(toggles);
×
522

523
  reverse_complement_benchmarker(records.records_1(), records.records_2())
524
    .run_if_toggled(toggles);
525

×
526
  complexity_benchmarker(records.records_1(), records.records_2())
×
527
    .run_if_toggled(toggles);
528

529
  basic_trimming_benchmarker(records.records_1(), records.records_2())
530
    .run_if_toggled(toggles);
×
531

×
532
  mott_trimming_benchmarker(records.records_1(), records.records_2())
×
533
    .run_if_toggled(toggles);
×
534

535
  window_trimming_benchmarker(records.records_1(), records.records_2())
×
536
    .run_if_toggled(toggles);
×
537

538
  fastq_statistics_benchmarker(records.records_1(), records.records_2())
×
539
    .run_if_toggled(toggles);
×
540

541
  for (const auto is : simd::supported()) {
×
542
    benchmarker_se_alignment(config, records.records_1(), is)
×
543
      .run_if_toggled(toggles);
544
  }
×
545

×
546
  if (config.paired_ended_mode) {
547
    for (const auto is : simd::supported()) {
×
548
      pe_alignment_benchmarker(config,
×
549
                               records.records_1(),
550
                               records.records_2(),
×
551
                               is)
×
552
        .run_if_toggled(toggles);
553
    }
×
554
  }
×
555

556
  return 0;
×
557
}
×
558

×
559
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc