• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #73

22 Mar 2025 10:19PM UTC coverage: 27.088% (-0.002%) from 27.09%
#73

push

travis-ci

web-flow
updates to formating and licensing headers (#95)

* use SPDX headers for licenses

This reduces verbosity and works around an issue with clang-format where
some formatting would not be applied due to the \***\ headers.

* set AllowAllArgumentsOnNextLine and InsertBraces

This results in more consistent formatting using clang-format

18 of 61 new or added lines in 12 files covered. (29.51%)

343 existing lines in 3 files now uncovered.

2601 of 9602 relevant lines covered (27.09%)

4259.01 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main_benchmark.cpp
1
// SPDX-License-Identifier: GPL-3.0-or-later
2
// SPDX-FileCopyrightText: 2024 Mikkel Schubert <mikkelsch@gmail.com>
3
#include "alignment.hpp"         // for alignment_info, sequence_aligner
4
#include "benchmarking.hpp"      // for benchmarker
5
#include "fastq.hpp"             // for fastq
6
#include "linereader_joined.hpp" // for joined_line_readers
7
#include "logging.hpp"           // for log
8
#include "sequence_sets.hpp"     // for adapter_set
9
#include "statistics.hpp"        // for fastq_statistics
10
#include "strutils.hpp"          // for to_lower
11
#include "userconfig.hpp"        // for userconfig
12
#include <cstdint>               // for size_t
13
#include <vector>                // for vector
14

15
namespace adapterremoval {
16

17
namespace {
18

19
#ifdef __clang__
20
#define NO_OPTIMIZE_CLANG __attribute__((optnone))
21
#define NO_OPTIMIZE_GCC
22
#else
23
#define NO_OPTIMIZE_CLANG
24
#define NO_OPTIMIZE_GCC __attribute__((optimize("O0")))
25
#endif
26

27
/** Unoptimized to prevent calculations from being elided by the compiler */
28
template<typename T>
29
void NO_OPTIMIZE_GCC
30
blackbox(T& /* unused */) NO_OPTIMIZE_CLANG
×
31
{
32
}
33

×
34
class readlines_benchmarker : public benchmarker
35
{
36
public:
×
37
  readlines_benchmarker(string_vec filenames_1,
38
                        string_vec filenames_2,
39
                        const size_t head)
×
40
    : benchmarker("FASTQ reading", { "read" })
41
    , m_filenames_1(std::move(filenames_1))
42
    , m_filenames_2(std::move(filenames_2))
×
43
    , m_head(head)
44
  {
45
    set_required();
×
46
  }
47

48
  const string_vec& lines_1() const { return m_lines_1; }
49

50
  const string_vec& lines_2() const { return m_lines_2; }
51

52
protected:
×
53
  void setup() override
54
  {
55
    m_lines_1 = string_vec();
×
56
    m_lines_2 = string_vec();
×
57
  }
×
58

×
59
  void execute() override
60
  {
×
61
    read_lines(m_filenames_1, m_lines_1);
62
    read_lines(m_filenames_2, m_lines_2);
63

×
64
    blackbox(m_lines_1);
65
    blackbox(m_lines_2);
×
66
  }
67

68
private:
×
69
  void read_lines(const string_vec& filenames, string_vec& lines) const
70
  {
×
71
    joined_line_readers reader(filenames);
×
72
    while (lines.size() / 4 < m_head) {
73
      lines.emplace_back(std::string());
74
      if (!reader.getline(lines.back())) {
×
75
        lines.pop_back();
76
        break;
×
77
      }
×
78
    }
79
  }
×
80

×
81
  string_vec m_filenames_1{};
82
  string_vec m_filenames_2{};
83
  size_t m_head = 0;
84
  // Vector containing files set of lines read
×
85
  string_vec m_lines_1{};
86
  string_vec m_lines_2{};
×
87
};
×
88

×
89
/** Benchmarking of FASTQ parsing excluding file IO */
×
90
class fastq_parser_benchmarker : public benchmarker
×
91
{
×
92
public:
93
  fastq_parser_benchmarker(const string_vec& lines_1, const string_vec& lines_2)
94
    : benchmarker("FASTQ parsing", { "parse" })
95
    , m_lines_1(lines_1)
96
    , m_lines_2(lines_2)
97
  {
98
    set_required();
99
  }
100

101
  const fastq_vec& records_1() const { return m_records_1; }
102

103
  const fastq_vec& records_2() const { return m_records_2; }
104

105
protected:
106
  void setup() override
107
  {
108
    m_records_1 = fastq_vec();
×
109
    m_records_2 = fastq_vec();
×
110
  }
×
111

×
112
  void execute() override
113
  {
×
114
    fastq record;
115
    {
116
      vec_reader reader_1(m_lines_1);
×
117
      while (record.read(reader_1, FASTQ_ENCODING_33)) {
118
        m_records_1.push_back(record);
×
119
      }
120
    }
121

×
122
    {
123
      vec_reader reader_2(m_lines_2);
×
124
      while (record.read(reader_2, FASTQ_ENCODING_33)) {
×
125
        m_records_2.push_back(record);
126
      }
127
    }
×
128

129
    blackbox(m_records_1);
×
130
    blackbox(m_records_2);
×
131
  }
×
132

×
133
private:
×
134
  const string_vec& m_lines_1;
135
  const string_vec& m_lines_2;
136
  fastq_vec m_records_1{};
137
  fastq_vec m_records_2{};
×
138
};
×
139

×
140
class reverse_complement_benchmarker : public benchmarker
×
141
{
142
public:
143
  reverse_complement_benchmarker(fastq_vec records_1, fastq_vec records_2)
144
    : benchmarker("reverse complement", { "revcompl" })
×
145
    , m_records_1(std::move(records_1))
×
146
    , m_records_2(std::move(records_2))
147
  {
148
  }
149

150
protected:
151
  void execute() override
152
  {
153
    for (auto& it : m_records_1) {
154
      it.reverse_complement();
155
    }
156

157
    for (auto& it : m_records_2) {
158
      it.reverse_complement();
×
159
    }
×
160
  }
×
161

×
162
private:
163
  fastq_vec m_records_1{};
164
  fastq_vec m_records_2{};
165
};
166

×
167
class complexity_benchmarker : public benchmarker
168
{
×
169
public:
×
170
  complexity_benchmarker(const fastq_vec& records_1, const fastq_vec& records_2)
171
    : benchmarker("read complexity", { "complexity" })
172
    , m_records_1(records_1)
×
173
    , m_records_2(records_2)
×
174
  {
175
  }
176

177
protected:
178
  void execute() override
179
  {
180
    double total = 0.0;
181
    total += complexity(m_records_1);
182
    total += complexity(m_records_2);
×
183

184
    blackbox(total);
185
  }
×
186

×
187
private:
×
188
  static double complexity(const fastq_vec& records)
×
189
  {
190
    double total = 0.0;
191
    for (const auto& it : records) {
192
      total += it.complexity();
193
    }
×
194

195
    return total;
×
196
  }
×
197

×
198
  const fastq_vec& m_records_1;
199
  const fastq_vec& m_records_2;
×
200
};
201

202
class trimming_benchmarker : public benchmarker
203
{
204
public:
205
  trimming_benchmarker(const std::string& desc,
×
206
                       const std::string& toggle,
×
207
                       const fastq_vec& records_1,
×
208
                       const fastq_vec& records_2)
209
    : benchmarker(desc, { "trim", "trim:" + toggle })
210
    , m_records_1(records_1)
×
211
    , m_records_2(records_2)
212
  {
213
  }
214

215
protected:
216
  void setup() override
217
  {
218
    m_trimmed_records_1 = m_records_1;
219
    m_trimmed_records_2 = m_records_2;
220
  }
×
221

222
  void execute() override
223
  {
224
    trim(m_trimmed_records_1);
×
225
    trim(m_trimmed_records_2);
×
226

×
227
    blackbox(m_trimmed_records_1);
228
    blackbox(m_trimmed_records_2);
229
  }
230

231
  virtual void trim(fastq_vec& reads) const = 0;
×
232

233
private:
×
234
  const fastq_vec& m_records_1;
×
235
  const fastq_vec& m_records_2;
236
  fastq_vec m_trimmed_records_1{};
237
  fastq_vec m_trimmed_records_2{};
×
238
};
239

×
240
class basic_trimming_benchmarker : public trimming_benchmarker
×
241
{
242
public:
×
243
  basic_trimming_benchmarker(const fastq_vec& records_1,
×
244
                             const fastq_vec& records_2)
245
    : trimming_benchmarker("basic trimming", "basic", records_1, records_2)
246
  {
247
  }
248

249
protected:
250
  void trim(fastq_vec& reads) const override
251
  {
252
    for (auto& read : reads) {
253
      read.trim_trailing_bases(true, 2);
254
    }
255
  }
×
256
};
257

258
class mott_trimming_benchmarker : public trimming_benchmarker
×
259
{
260
public:
×
261
  mott_trimming_benchmarker(const fastq_vec& records_1,
262
                            const fastq_vec& records_2)
263
    : trimming_benchmarker("mott trimming", "mott", records_1, records_2)
264
  {
265
  }
×
266

267
protected:
×
268
  void trim(fastq_vec& reads) const override
×
269
  {
270
    for (auto& read : reads) {
271
      read.mott_trimming(0.05);
272
    }
273
  }
×
274
};
275

276
class window_trimming_benchmarker : public trimming_benchmarker
×
277
{
278
public:
×
279
  window_trimming_benchmarker(const fastq_vec& records_1,
280
                              const fastq_vec& records_2)
281
    : trimming_benchmarker("window trimming", "window", records_1, records_2)
282
  {
283
  }
×
284

285
protected:
×
286
  void trim(fastq_vec& reads) const override
×
287
  {
288
    for (auto& read : reads) {
289
      read.trim_windowed_bases(true, 2, 0.1);
290
    }
291
  }
×
292
};
293

294
/** Class for benchmarking collection of `fastq_statistics` */
×
295
class fastq_statistics_benchmarker : public benchmarker
296
{
×
297
public:
298
  fastq_statistics_benchmarker(const fastq_vec& records_1,
299
                               const fastq_vec& records_2)
300
    : benchmarker("read statistics", { "stats" })
301
    , m_records_1(records_1)
×
302
    , m_records_2(records_2)
303
  {
×
304
  }
×
305

306
  void execute() override
307
  {
308
    collect_statistics(m_records_1);
309
    collect_statistics(m_records_2);
310
  }
×
311

312
private:
313
  static void collect_statistics(const fastq_vec& records)
×
314
  {
315
    fastq_statistics stats;
×
316
    for (const auto& it : records) {
×
317
      stats.process(it);
×
318
    }
319

320
    blackbox(stats);
321
  }
×
322

323
  const fastq_vec& m_records_1;
×
324
  const fastq_vec& m_records_2;
×
325
};
326

327
/** Base-class for benchmarking SE/PE alignments */
328
class alignment_benchmarker : public benchmarker
329
{
330
public:
×
331
  alignment_benchmarker(const std::string& key, const simd::instruction_set is)
×
332
    : benchmarker(to_upper(key) + " alignment (" + simd::name(is) + ")", {})
×
333
    , m_key(key)
334
    , m_is(is)
335
  {
×
336
  }
337

338
  strategy enabled(const benchmark_toggles& toggles) const override
339
  {
340
    if (toggles.defaults() || toggles.is_set("align") ||
341
        toggles.is_set("align:" + m_key)) {
342

343
      if (toggles.is_set("simd") ||
344
          toggles.is_set("simd:" + to_lower(simd::name(m_is)))) {
345
        return strategy::benchmark;
346
      }
×
347

×
348
      // Benchmark the preferred algorithm if no algorithms were specified
×
349
      const auto supported = simd::supported();
×
350
      if (!supported.empty() && supported.back() == m_is) {
351
        for (const auto is : supported) {
352
          if (toggles.is_set("simd:" + to_lower(simd::name(is)))) {
353
            return strategy::skip;
×
354
          }
355
        }
×
356

×
357
        return strategy::benchmark;
358
      }
×
359
    }
×
360

×
361
    return strategy::skip;
362
  }
363

364
private:
×
365
  const std::string m_key;
×
366
  const simd::instruction_set m_is;
×
367
};
×
368

×
369
/** Benchmarking of SE alignments */
370
class benchmarker_se_alignment : public alignment_benchmarker
371

372
{
×
373
public:
374
  benchmarker_se_alignment(const userconfig& config,
375
                           const fastq_vec& reads,
376
                           const simd::instruction_set is)
377
    : alignment_benchmarker("se", is)
378
    , m_config(config)
379
    , m_reads(reads)
380
    , m_adapters(config.samples.adapters())
381
    , m_aligner(m_adapters, is)
382
  {
383
  }
384

385
protected:
386
  void execute() override
387
  {
388
    alignment_info best;
389

×
390
    for (const auto& read : m_reads) {
391
      const alignment_info alignment =
392
        m_aligner.align_single_end(read, m_config.shift);
×
393

×
394
      if (alignment.score() > best.score()) {
×
395
        best = alignment;
×
396
      }
×
397
    }
398

399
    blackbox(best);
400
  }
401

×
402
private:
403
  const userconfig& m_config;
×
404
  const fastq_vec& m_reads;
405
  const adapter_set m_adapters;
×
406
  sequence_aligner m_aligner;
×
407
};
×
408

409
/** Benchmarking of PE alignments */
×
410
class pe_alignment_benchmarker : public alignment_benchmarker
×
411
{
412
public:
413
  pe_alignment_benchmarker(const userconfig& config,
414
                           const fastq_vec& mate_1,
×
415
                           const fastq_vec& mate_2,
416
                           const simd::instruction_set is)
417
    : alignment_benchmarker("pe", is)
418
    , m_config(config)
419
    , m_mate_1(mate_1)
420
    , m_mate_2(mate_2)
421
    , m_adapters(config.samples.adapters())
422
    , m_aligner(m_adapters, is)
423
  {
424
  }
425

426
protected:
427
  void setup() override
428
  {
×
429
    if (m_mate_2_reversed.empty()) {
430
      m_mate_2_reversed = m_mate_2;
431

432
      // Done as part of the alignment loop but is benchmarked separately
×
433
      for (auto& it : m_mate_2_reversed) {
×
434
        it.reverse_complement();
×
435
      }
×
436
    }
×
437
  }
×
438

439
  void execute() override
440
  {
441
    AR_REQUIRE(m_mate_1.size() == m_mate_2_reversed.size());
442

×
443
    alignment_info best;
444

×
445
    auto it_1 = m_mate_1.begin();
×
446
    auto it_2 = m_mate_2_reversed.begin();
447
    while (it_1 != m_mate_1.end()) {
448
      const fastq& read_1 = *it_1++;
×
449
      const fastq& read_2 = *it_2++;
×
450

451
      const alignment_info alignment =
452
        m_aligner.align_paired_end(read_1, read_2, m_config.shift);
453

454
      if (alignment.score() > best.score()) {
×
455
        best = alignment;
456
      }
×
457
    }
458

×
459
    blackbox(best);
460
  }
×
461

×
462
private:
×
463
  const userconfig& m_config;
×
464
  const fastq_vec& m_mate_1;
×
465
  const fastq_vec& m_mate_2;
466
  fastq_vec m_mate_2_reversed{};
×
467
  adapter_set m_adapters;
×
468
  sequence_aligner m_aligner;
469
};
×
470

×
471
string_vec
472
supported_toggles()
473
{
474
  string_vec toggles = { "read",  "parse",      "revcompl",  "complexity",
×
475
                         "trim",  "trim:basic", "trim:mott", "trim:window",
476
                         "stats", "align",      "align:se",  "align:pe",
477
                         "simd" };
478

479
  for (const auto is : simd::supported()) {
480
    toggles.push_back(std::string("simd:") + to_lower(simd::name(is)));
481
  }
482

483
  return toggles;
484
}
485

486
} // namespace
487

×
488
int
489
benchmark(const userconfig& config)
×
490
{
491
  auto head = config.head;
492
  // Limit benchmarking to a reasonable data-set size by default
×
493
  if (config.head == std::numeric_limits<uint64_t>::max()) {
494
    log::warn() << "Defaulting to reading at most 1,000,000 reads/mate pairs";
×
495
    head = 1000000;
×
496
  }
497

498
  // Parse user-specified benchmarking toggles
×
499
  benchmark_toggles toggles(supported_toggles());
×
500
  if (!toggles.update_toggles(config.benchmarks)) {
501
    return 1;
502
  }
503

504
  readlines_benchmarker lines{ config.input_files_1,
×
505
                               config.input_files_2,
506
                               head };
×
507
  lines.run_if_toggled(toggles);
508

×
509
  fastq_parser_benchmarker records{ lines.lines_1(), lines.lines_2() };
×
510
  records.run_if_toggled(toggles);
×
511

512
  reverse_complement_benchmarker(records.records_1(), records.records_2())
513
    .run_if_toggled(toggles);
514

×
515
  complexity_benchmarker(records.records_1(), records.records_2())
×
516
    .run_if_toggled(toggles);
517

518
  basic_trimming_benchmarker(records.records_1(), records.records_2())
519
    .run_if_toggled(toggles);
×
520

×
521
  mott_trimming_benchmarker(records.records_1(), records.records_2())
×
522
    .run_if_toggled(toggles);
×
523

524
  window_trimming_benchmarker(records.records_1(), records.records_2())
×
525
    .run_if_toggled(toggles);
×
526

527
  fastq_statistics_benchmarker(records.records_1(), records.records_2())
×
528
    .run_if_toggled(toggles);
×
529

530
  for (const auto is : simd::supported()) {
×
531
    benchmarker_se_alignment(config, records.records_1(), is)
×
532
      .run_if_toggled(toggles);
533
  }
×
534

×
535
  if (config.paired_ended_mode) {
536
    for (const auto is : simd::supported()) {
×
NEW
537
      pe_alignment_benchmarker(config,
×
538
                               records.records_1(),
NEW
539
                               records.records_2(),
×
NEW
540
                               is)
×
541
        .run_if_toggled(toggles);
542
    }
×
UNCOV
543
  }
×
544

545
  return 0;
×
UNCOV
546
}
×
547

×
548
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc