• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #105

25 Apr 2025 08:50AM UTC coverage: 66.927% (-0.03%) from 66.961%
#105

push

travis-ci

web-flow
avoid undefined behavior when stringifying enums (#131)

It is not completely clear to me what is allowed, based on the spec,
so playing it safe

0 of 3 new or added lines in 2 files covered. (0.0%)

2 existing lines in 1 file now uncovered.

9691 of 14480 relevant lines covered (66.93%)

3053.83 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.72
/src/sequence_sets.cpp
1
// SPDX-License-Identifier: GPL-3.0-or-later
2
// SPDX-FileCopyrightText: 2011 Stinus Lindgreen <stinus@binf.ku.dk>
3
// SPDX-FileCopyrightText: 2014 Mikkel Schubert <mikkelsch@gmail.com>
4
#include "sequence_sets.hpp" // declarations
5
#include "debug.hpp"         // for AR_REQUIRE
6
#include "errors.hpp"        // for fastq_error
7
#include "linereader.hpp"    // for line_reader
8
#include "sequence.hpp"      // for dna_sequence
9
#include "strutils.hpp"      // for log_escape
10
#include "table_reader.hpp"  // for table_reader
11
#include "utilities.hpp"     // for underlying_value
12
#include <algorithm>         // for max, sort, find
13
#include <cstddef>           // for size_t
14
#include <initializer_list>  // for initializer_list
15
#include <sstream>           // for operator<<, basic_ostream, ostringstream
16
#include <stdexcept>         // for invalid_argument
17
#include <string>            // for to_string
18
#include <string_view>       // for string_view
19
#include <utility>           // for pair
20
#include <vector>            // for vector, vector<>::const_iterator
21

22
namespace adapterremoval {
23

24
namespace {
25

26
void
27
validate_sample_name(std::string_view name)
188✔
28
{
29
  AR_REQUIRE(!name.empty());
188✔
30
  if (name == "unidentified") {
188✔
31
    throw parsing_error("The sample name 'unidentified' is a reserved "
1✔
32
                        "name, and cannot be used!");
2✔
33
  }
34

35
  for (const char c : name) {
1,765✔
36
    if (!is_ascii_letter_or_digit(c) && (c != '_')) {
1,392✔
37
      std::ostringstream error;
1✔
38
      error << "The sample name " << log_escape(name)
2✔
39
            << " is not a valid sample name; only letters ('a' to 'z' and 'A' "
40
               "to 'Z'), numbers (0 to 9) and underscores (_) are allowed.";
2✔
41

42
      throw parsing_error(error.str());
2✔
43
    }
1✔
44
  }
45
}
186✔
46

47
void
48
validate_barcode_sequence(std::string_view seq,
346✔
49
                          const size_t expected_length,
50
                          const int mate)
51
{
52
  if (seq.find('N') != std::string::npos) {
346✔
53
    std::ostringstream error;
2✔
54
    error << "Degenerate base (N) found in mate " << mate << " barcode "
2✔
55
          << "sequence " << log_escape(seq) << ". Degenerate bases are not "
4✔
56
          << "supported for demultiplexing; please remove before continuing!";
4✔
57

58
    throw parsing_error(error.str());
4✔
59
  }
2✔
60

61
  if (seq.length() != expected_length) {
344✔
62
    std::ostringstream error;
6✔
63
    error << "Inconsistent mate " << mate << " barcode lengths found: Last "
6✔
64
          << "barcode was " << expected_length << " base-pairs long, but "
6✔
65
          << "barcode " << log_escape(seq) << " is " << seq.length() << " "
12✔
66
          << "base-pairs long. Variable length barcodes are not supported";
6✔
67

68
    throw parsing_error(error.str());
12✔
69
  }
6✔
70
}
338✔
71

72
/**
73
 * Checks for multiple barcodes, while allowing that both the forward and
74
 * reverse barcode has been specified for a sample
75
 */
76
void
77
disallow_multiple_barcodes(const sample& s)
142✔
78
{
79
  if (s.size() > 1) {
284✔
80
    if (s.size() == 2) {
12✔
81
      const auto& ss_0 = s.at(0);
6✔
82
      const auto& ss_1 = s.at(1);
6✔
83

84
      // It is allowed to explicitly specify both forward and reverse barcodes
85
      if (ss_0.barcode_1 == ss_1.barcode_2 &&
6✔
86
          ss_0.barcode_2 == ss_1.barcode_1 &&
10✔
87
          ss_0.orientation != ss_1.orientation) {
4✔
88
        // At this point samples should not have been configured further
89
        AR_REQUIRE(ss_0.orientation != barcode_orientation::unspecified);
1✔
90
        AR_REQUIRE(ss_1.orientation != barcode_orientation::unspecified);
1✔
91
        AR_REQUIRE(ss_0.has_read_group == ss_1.has_read_group);
1✔
92
        AR_REQUIRE(ss_0.adapters == ss_1.adapters);
1✔
93
        AR_REQUIRE(ss_0.read_group_ == ss_1.read_group_);
1✔
94
        return;
1✔
95
      }
96
    }
97

98
    std::ostringstream error;
5✔
99
    error << "Duplicate sample name " << log_escape(s.name())
15✔
100
          << "; multiple barcodes per samples is not enabled. Either ensure "
101
             "that all sample names are unique or use --multiple-barcodes to "
102
             "map multiple barcodes to a single sample";
10✔
103

104
    throw parsing_error(error.str());
10✔
105
  }
5✔
106
}
107

108
/** Check that sample names are valid and not overlapping (case-insensitive) */
109
void
110
check_sample_names(const std::vector<sample>& samples)
95✔
111
{
112
  std::vector<std::pair<std::string, std::string>> names;
95✔
113
  for (const auto& sample : samples) {
471✔
114
    const auto& name = sample.name();
188✔
115

116
    validate_sample_name(name);
188✔
117
    names.emplace_back(to_lower(name), name);
558✔
118
  }
119

120
  std::sort(names.begin(), names.end());
279✔
121
  for (size_t i = 1; i < names.size(); ++i) {
179✔
122
    const auto& [key_0, name_0] = names.at(i - 1);
270✔
123
    const auto& [key_1, name_1] = names.at(i);
270✔
124

125
    if ((key_0 == key_1) && (name_0 != name_1)) {
90✔
126
      std::ostringstream error;
4✔
127
      error << "Samples with names " << log_escape(name_0) << " and "
8✔
128
            << log_escape(name_1) << " differ only by case. Either use the "
8✔
129
            << "exact same name for both, if they the same sample, or give "
130
               "them distinct names";
12✔
131

132
      throw parsing_error(error.str());
8✔
133
    }
4✔
134
  }
135
}
184✔
136

137
/** Checks for basic barcode validity, but not for overlapping barcodes */
138
void
139
check_barcode_sequences(const std::vector<sample>& samples,
89✔
140
                        bool allow_multiple_barcodes)
141
{
142
  if (samples.empty()) {
89✔
143
    throw parsing_error("No samples/barcodes found in table");
2✔
144
  }
145

146
  auto mate_1_len = static_cast<size_t>(-1);
88✔
147
  auto mate_2_len = static_cast<size_t>(-1);
88✔
148

149
  for (const auto& sample : samples) {
425✔
150
    if (!allow_multiple_barcodes) {
174✔
151
      disallow_multiple_barcodes(sample);
142✔
152
    }
153

154
    for (const auto& it : sample) {
675✔
155
      if (mate_1_len == static_cast<size_t>(-1)) {
176✔
156
        mate_1_len = it.barcode_1.length();
83✔
157
        mate_2_len = it.barcode_2.length();
166✔
158
      }
159

160
      validate_barcode_sequence(it.barcode_1, mate_1_len, 1);
352✔
161
      validate_barcode_sequence(it.barcode_2, mate_2_len, 2);
340✔
162
    }
163
  }
164
}
75✔
165

166
/** Helper class used to validate barcodes */
167
struct barcode_key
168
{
169
  std::string_view barcode_1;
170
  std::string_view barcode_2;
171
  barcode_orientation orientation;
172
  std::string_view sample;
173

174
  [[nodiscard]] std::string describe() const
30✔
175
  {
176
    std::ostringstream out;
30✔
177
    out << log_escape(this->sample) << " (" << this->barcode_1;
90✔
178

179
    if (!this->barcode_2.empty()) {
60✔
180
      out << "-" << this->barcode_2;
26✔
181
    }
182

183
    switch (this->orientation) {
30✔
184
      case barcode_orientation::unspecified:
8✔
185
        out << ")";
8✔
186
        break;
187
      case barcode_orientation::forward:
13✔
188
        out << "; forward)";
13✔
189
        break;
190
      case barcode_orientation::reverse:
9✔
191
        out << "; reverse)";
9✔
192
        break;
193
      default:                                  // GCOVR_EXCL_LINE
×
194
        AR_FAIL("invalid barcode_orientation"); // GCOVR_EXCL_LINE
×
195
    }
196

197
    return out.str();
60✔
198
  }
30✔
199

200
  bool operator<(const barcode_key& other) const
297✔
201
  {
202
    // Sorted by barcodes first to allow easy duplicate checks
203
    if (this->barcode_1 != other.barcode_1) {
594✔
204
      return this->barcode_1 < other.barcode_1;
506✔
205
    } else if (this->barcode_2 != other.barcode_2) {
88✔
206
      return this->barcode_2 < other.barcode_2;
16✔
207
    } else if (this->sample != other.sample) {
72✔
208
      // Sorted by sample name after barcodes for more sensible looking output
209
      // when overlapping sequences are printed
210
      return this->sample < other.sample;
68✔
211
    } else {
212
      return this->orientation < other.orientation;
2✔
213
    }
214
  }
215
};
216

217
/** Checks for overlapping barcodes */
218
void
219
check_barcode_overlap(const std::vector<sample>& samples, bool paired_end)
75✔
220
{
221
  std::vector<barcode_key> sequences;
75✔
222
  for (const auto& sample : samples) {
380✔
223
    for (const auto& it : sample) {
669✔
224
      sequences.emplace_back(barcode_key{ it.barcode_1,
408✔
225
                                          it.barcode_2,
204✔
226
                                          it.orientation,
204✔
227
                                          sample.name() });
408✔
228
    }
229
  }
230

231
  std::sort(sequences.begin(), sequences.end());
225✔
232
  for (size_t i = 1; i < sequences.size(); ++i) {
157✔
233
    const auto& it_0 = sequences.at(i - 1);
97✔
234
    const auto& it_1 = sequences.at(i);
97✔
235

236
    if (it_0.barcode_1 == it_1.barcode_1 &&
97✔
237
        (!paired_end || it_0.barcode_2 == it_1.barcode_2)) {
10✔
238
      std::ostringstream error;
15✔
239
      error << "Sample " << it_0.describe() << " and sample " << it_1.describe()
60✔
240
            << " have overlapping barcodes";
45✔
241

242
      if (it_0.barcode_2 != it_1.barcode_2) {
30✔
243
        error << ". Note that AdapterRemoval cannot distinguish these barcodes "
1✔
244
              << "in single-end mode, even though the second barcodes differ";
1✔
245
      }
246

247
      error << ". Please remove any duplicate entries from the barcode table "
15✔
248
               "before continuing";
15✔
249

250
      throw parsing_error(error.str());
30✔
251
    }
15✔
252
  }
253
}
135✔
254

255
/** Returns the reverse of a user specified orientation */
256
barcode_orientation
257
reverse_orientation(barcode_table_orientation orientation)
75✔
258
{
259
  switch (orientation) {
75✔
260
    case barcode_table_orientation::forward:
261
      return barcode_orientation::reverse;
262
    case barcode_table_orientation::reverse:
263
      return barcode_orientation::forward;
264
    // Cannot be reversed
265
    case barcode_table_orientation::unspecified:
266
    case barcode_table_orientation::explicit_:
267
      return barcode_orientation::unspecified;
268
    default:                                        // GCOVR_EXCL_LINE
×
269
      AR_FAIL("invalid barcode_table_orientation"); // GCOVR_EXCL_LINE
×
270
  }
271
}
272

273
/** Build list of samples from individual barcodes ordered by sample */
274
void
275
append_sample(std::vector<sample>& samples,
284✔
276
              const std::string& name,
277
              const dna_sequence& barcode_1,
278
              const dna_sequence& barcode_2,
279
              barcode_orientation orientation)
280
{
281

282
  if (samples.empty() || samples.back().name() != name) {
457✔
283
    samples.emplace_back(name, barcode_1, barcode_2, orientation);
229✔
284
  } else {
285
    samples.back().add_barcodes(barcode_1, barcode_2, orientation);
275✔
286
  }
287
}
284✔
288

289
/** Updates a vector of barcodes to include the reverse sequences  */
290
void
291
create_reversed_barcodes(std::vector<sample>& samples,
75✔
292
                         barcode_table_orientation orientation)
293
{
294
  const auto rev_orientation = reverse_orientation(orientation);
75✔
295
  if (rev_orientation == barcode_orientation::unspecified) {
75✔
296
    return;
58✔
297
  }
298

299
  // This is inefficient, but easier than trying to sort the list afterwards
300
  std::vector<sample> rsamples;
17✔
301
  for (const auto& sample : samples) {
92✔
302
    for (const auto& seqs : sample) {
165✔
303
      const auto& name = sample.name();
42✔
304
      const auto& barcode_1 = seqs.barcode_1;
42✔
305
      const auto& barcode_2 = seqs.barcode_2;
42✔
306

307
      append_sample(rsamples, name, barcode_1, barcode_2, seqs.orientation);
42✔
308
      // NOLINTNEXTLINE(readability-suspicious-call-argument)
309
      append_sample(rsamples, name, barcode_2, barcode_1, rev_orientation);
42✔
310
    }
311
  }
312

313
  std::swap(samples, rsamples);
17✔
314
}
17✔
315

316
/** Returns expected number of columns for different types of barcode tables */
317
std::pair<int, int>
318
barcode_table_columns(barcode_table_orientation orientation)
106✔
319
{
320
  auto min_columns = 2; // Name and one barcode
106✔
321
  auto max_columns = 3; // Name and two barcodes
106✔
322
  switch (orientation) {
106✔
323
    case barcode_table_orientation::unspecified:
324
      break;
325
    case barcode_table_orientation::forward:
25✔
326
    case barcode_table_orientation::reverse:
25✔
327
      min_columns = 3;
25✔
328
      break;
25✔
329
    case barcode_table_orientation::explicit_:
13✔
330
      min_columns = 4; // Name, two barcodes, and orientation
13✔
331
      max_columns = 4;
13✔
332
      break;
13✔
333
    default:                                  // GCOVR_EXCL_LINE
×
334
      AR_FAIL("invalid barcode_orientation"); // GCOVR_EXCL_LINE
×
335
  }
336

337
  return { min_columns, max_columns };
212✔
338
}
339

340
/** Parse barcode orientation from a table file */
341
barcode_orientation
342
parse_barcode_orientation(std::string_view name, std::string value)
26✔
343
{
344
  value = to_lower(value);
78✔
345
  if (value == "forward" || value == "fwd" || value == "+") {
26✔
346
    return barcode_orientation::forward;
16✔
347
  } else if (value == "reverse" || value == "rev" || value == "-") {
10✔
348
    return barcode_orientation::reverse;
8✔
349
  }
350

351
  throw parsing_error("Invalid barcode orientation for sample " +
8✔
352
                      log_escape(name) + ": " + log_escape(value));
16✔
353
}
354

355
} // namespace
356

357
///////////////////////////////////////////////////////////////////////////////
358

359
barcode_table_orientation
360
parse_table_orientation(std::string_view value)
9✔
361
{
362
  value = trim_ascii_whitespace(value);
9✔
363
  auto value_l = to_lower(std::string{ value });
18✔
364

365
  if (value_l == "forward") {
9✔
366
    return barcode_table_orientation::forward;
367
  } else if (value_l == "reverse") {
8✔
368
    return barcode_table_orientation::reverse;
369
  } else if (value_l == "explicit") {
6✔
370
    return barcode_table_orientation::explicit_;
371
  } else if (value_l == "unspecified") {
5✔
372
    return barcode_table_orientation::unspecified;
373
  } else {
374
    throw std::invalid_argument("invalid barcode table orientation ");
4✔
375
  }
376
}
9✔
377

378
///////////////////////////////////////////////////////////////////////////////
379
// Implementations for 'adapter_set' class
380

381
adapter_set::adapter_set(std::initializer_list<string_view_pair> args)
147✔
382
{
383
  for (const auto& [first, second] : args) {
648✔
384
    add(dna_sequence{ first }, dna_sequence{ second });
501✔
385
  }
386
}
147✔
387

388
void
389
adapter_set::add(dna_sequence adapter1, const dna_sequence& adapter2)
169✔
390
{
391
  m_adapters.emplace_back(std::move(adapter1), adapter2.reverse_complement());
338✔
392
}
169✔
393

394
adapter_set
395
adapter_set::add_barcodes(const dna_sequence& barcode1,
154✔
396
                          const dna_sequence& barcode2) const
397
{
398
  adapter_set adapters;
154✔
399
  const auto barcode2rc = barcode2.reverse_complement();
154✔
400
  for (const auto& [first, second] : m_adapters) {
501✔
401
    // Add sequences directly in alignment orientation
402
    adapters.m_adapters.emplace_back(barcode2rc + first, second + barcode1);
39✔
403
  }
404

405
  return adapters;
308✔
406
}
154✔
407

408
void
409
adapter_set::load(const std::string& filename, bool paired_end_mode)
×
410
{
411
  line_reader reader(filename);
×
412
  load(reader, paired_end_mode);
×
413
}
414

415
void
416
adapter_set::load(line_reader_base& reader, bool paired_end_mode)
11✔
417
{
418
  const auto table = table_reader()
33✔
419
                       .with_comment_char('#')
22✔
420
                       .with_min_columns(1 + paired_end_mode)
11✔
421
                       .with_max_columns(2)
11✔
422
                       .parse(reader);
11✔
423

424
  sequence_pair_vec adapters;
7✔
425
  for (const auto& row : table) {
31✔
426
    dna_sequence adapter_1{ row.at(0) };
30✔
427
    dna_sequence adapter_2;
10✔
428
    if (row.size() > 1) {
20✔
429
      adapter_2 = dna_sequence{ row.at(1) };
40✔
430
    }
431

432
    // Convert from read to alignment orientation
433
    adapters.emplace_back(std::move(adapter_1), adapter_2.reverse_complement());
20✔
434
  }
20✔
435

436
  if (adapters.empty()) {
7✔
437
    throw parsing_error("No adapter sequences in table");
4✔
438
  }
439

440
  std::swap(m_adapters, adapters);
5✔
441
}
14✔
442

443
sequence_pair_vec
444
adapter_set::to_read_orientation() const
3✔
445
{
446
  sequence_pair_vec adapters;
3✔
447
  for (const auto& [first, second] : m_adapters) {
18✔
448
    adapters.emplace_back(first, second.reverse_complement());
6✔
449
  }
450

451
  return adapters;
3✔
452
}
×
453

454
bool
455
adapter_set::operator==(const adapter_set& other) const
115✔
456
{
457
  return m_adapters == other.m_adapters;
115✔
458
}
459

460
std::ostream&
461
operator<<(std::ostream& os, const adapter_set& value)
8✔
462
{
463
  os << "adapter_set{[";
8✔
464

465
  bool is_first = true;
8✔
466
  for (const auto& [first, second] : value) {
33✔
467
    if (!is_first) {
3✔
468
      os << ", ";
1✔
469
    }
470

471
    is_first = false;
3✔
472
    os << "pair{first=" << first << ", second=" << second << "}";
3✔
473
  }
474

475
  return os << "]}";
8✔
476
}
477

478
///////////////////////////////////////////////////////////////////////////////
479
// Implementations for 'sample_sequences' class
480

481
sample_sequences::sample_sequences(dna_sequence barcode_1_,
684✔
482
                                   dna_sequence barcode_2_,
483
                                   barcode_orientation orientation_)
684✔
484
  : barcode_1(std::move(barcode_1_))
684✔
485
  , barcode_2(std::move(barcode_2_))
684✔
486
  , orientation(orientation_)
1,368✔
487
{
488
}
684✔
489

490
bool
491
sample_sequences::operator==(const sample_sequences& other) const
93✔
492
{
493
  return this->has_read_group == other.has_read_group &&
185✔
494
         this->read_group_ == other.read_group_ &&
92✔
495
         this->barcode_1 == other.barcode_1 &&
184✔
496
         this->barcode_2 == other.barcode_2 &&
178✔
497
         this->orientation == other.orientation &&
267✔
498
         this->adapters == other.adapters;
86✔
499
}
500

501
std::ostream&
502
operator<<(std::ostream& os, const sample_sequences& value)
4✔
503
{
504
  return os << "sample_sequences{has_read_group="
4✔
505
            << (value.has_read_group ? "true" : "false")
4✔
506
            << ", read_group=" << value.read_group_
10✔
507
            << ", barcode_1=" << value.barcode_1
4✔
508
            << ", barcode_2=" << value.barcode_2
4✔
509
            << ", orientation=" << value.orientation
4✔
510
            << ", adapters=" << value.adapters << "}";
4✔
511
}
512

513
///////////////////////////////////////////////////////////////////////////////
514
// Implementations for 'sample' class
515

516
sample::sample()
291✔
517
  : sample(std::string{},
582✔
518
           dna_sequence{},
582✔
519
           dna_sequence{},
582✔
520
           barcode_orientation::unspecified)
1,164✔
521
{
522
}
291✔
523

524
sample::sample(std::string name,
582✔
525
               dna_sequence barcode1,
526
               dna_sequence barcode2,
527
               barcode_orientation orientation)
582✔
528
  : m_name(std::move(name))
584✔
529
{
530
  add_barcodes(std::move(barcode1), std::move(barcode2), orientation);
2,910✔
531
}
583✔
532

533
void
534
sample::add_barcodes(dna_sequence barcode1,
673✔
535
                     dna_sequence barcode2,
536
                     barcode_orientation orientation)
537
{
538
  AR_REQUIRE(barcode2.empty() || !barcode1.empty());
1,671✔
539
  m_barcodes.emplace_back(std::move(barcode1),
672✔
540
                          std::move(barcode2),
541
                          orientation);
542
}
672✔
543

544
void
545
sample::set_adapters(const adapter_set& adapters)
126✔
546
{
547
  for (auto& it : m_barcodes) {
528✔
548
    it.adapters = adapters.add_barcodes(it.barcode_1, it.barcode_2);
450✔
549
  }
550
}
126✔
551

552
void
553
sample::set_read_group(const read_group& read_group_)
220✔
554
{
555
  for (auto it = m_barcodes.begin(); it != m_barcodes.end(); ++it) {
1,218✔
556
    it->read_group_ = read_group_;
279✔
557
    it->has_read_group = true;
279✔
558

559
    if (!m_name.empty()) {
558✔
560
      it->read_group_.set_sample(m_name);
253✔
561

562
      if (m_barcodes.size() > 1) {
253✔
563
        std::string id = m_name;
114✔
564
        id.push_back('.');
114✔
565
        id.append(std::to_string((it - m_barcodes.begin()) + 1));
342✔
566

567
        it->read_group_.set_id(id);
114✔
568
      } else {
114✔
569
        it->read_group_.set_id(m_name);
139✔
570
      }
571
    }
572

573
    if (it->barcode_1.length() || it->barcode_2.length()) {
618✔
574
      std::string barcodes;
249✔
575
      barcodes.append(it->barcode_1);
249✔
576
      if (it->barcode_2.length()) {
498✔
577
        barcodes.push_back('-');
217✔
578
        barcodes.append(it->barcode_2);
217✔
579
      }
580

581
      it->read_group_.set_barcodes(barcodes);
249✔
582
    }
249✔
583

584
    it->read_group_.set_orientation(it->orientation);
279✔
585
  }
586
}
220✔
587

588
bool
589
sample::operator==(const sample& other) const
57✔
590
{
591
  return m_name == other.m_name && m_barcodes == other.m_barcodes;
57✔
592
}
593

594
std::ostream&
595
operator<<(std::ostream& os, const sample& value)
3✔
596
{
597
  return os << "sample{name=" << log_escape(value.name()) << ", barcodes=["
9✔
598
            << join_text(value, ", ") << "]}";
12✔
599
}
600

601
///////////////////////////////////////////////////////////////////////////////
602
// Implementations for 'sample_set' class
603

604
sample_set::sample_set()
10✔
605
  : m_samples{ sample{} }
40✔
606
  , m_unidentified({},
30✔
607
                   dna_sequence{},
30✔
608
                   dna_sequence{},
30✔
609
                   barcode_orientation::unspecified)
10✔
610
{
611
  set_unidentified_read_group(m_read_group);
20✔
612
}
20✔
613

614
sample_set::sample_set(std::initializer_list<std::string_view> lines,
102✔
615
                       barcode_config config)
388✔
616
{
617
  vec_reader reader(lines);
102✔
618
  load(reader, config);
102✔
619
}
480✔
620

621
void
622
sample_set::set_adapters(adapter_set adapters)
3✔
623
{
624
  m_adapters = std::move(adapters);
3✔
625
  for (auto& sample : m_samples) {
13✔
626
    sample.set_adapters(m_adapters);
4✔
627
  }
628

629
  m_unidentified.set_adapters(m_adapters);
3✔
630
}
3✔
631

632
void
633
sample_set::set_read_group(std::string_view value)
9✔
634
{
635
  m_read_group = read_group(value);
18✔
636
  for (auto& sample : m_samples) {
39✔
637
    sample.set_read_group(m_read_group);
12✔
638
  }
639

640
  set_unidentified_read_group(m_read_group);
18✔
641
}
9✔
642

643
void
644
sample_set::load(const std::string& filename, const barcode_config& config)
×
645
{
646
  line_reader reader(filename);
×
647
  load(reader, config);
×
648
}
649

650
void
651
sample_set::load(line_reader_base& reader, const barcode_config& config)
106✔
652
{
653
  auto [min_columns, max_columns] = barcode_table_columns(config.m_orientation);
212✔
654

655
  auto barcodes = table_reader()
318✔
656
                    .with_comment_char('#')
212✔
657
                    .with_min_columns(min_columns)
106✔
658
                    .with_max_columns(max_columns)
106✔
659
                    .parse(reader);
106✔
660

661
  // Sort by sample name to simplify sample set construction
662
  std::sort(barcodes.begin(), barcodes.end(), [](const auto& a, const auto& b) {
291✔
663
    return a.at(0) < b.at(0);
630✔
664
  });
665

666
  std::vector<sample> samples;
97✔
667
  for (const auto& row : barcodes) {
491✔
668
    const auto& name = row.at(0);
404✔
669
    const dna_sequence barcode_1{ row.at(1) };
606✔
670
    dna_sequence barcode_2;
202✔
671

672
    if (row.size() > 2) {
404✔
673
      barcode_2 = dna_sequence{ row.at(2) };
815✔
674
    }
675

676
    auto orientation = barcode_orientation::unspecified;
202✔
677
    if (config.m_orientation == barcode_table_orientation::explicit_) {
202✔
678
      orientation = parse_barcode_orientation(name, row.at(3));
78✔
679
    } else {
680
      orientation = static_cast<barcode_orientation>(config.m_orientation);
176✔
681
    }
682

683
    append_sample(samples, name, barcode_1, barcode_2, orientation);
200✔
684
  }
404✔
685

686
  // Check sample names for overlap (case-insensitive) and disallowed characters
687
  check_sample_names(samples);
95✔
688

689
  // Basic properties are checked first, before (potentially) reversing barcodes
690
  check_barcode_sequences(samples, config.m_allow_multiple_barcodes);
89✔
691

692
  // Create reversed barcodes if enabled
693
  create_reversed_barcodes(samples, config.m_orientation);
75✔
694

695
  // Check for overlap between user barcodes, generated barcodes, or both
696
  check_barcode_overlap(samples, config.m_paired_end_mode);
75✔
697

698
  // Update read-group information and generate adapters based on all barcodes
699
  for (auto& s : samples) {
298✔
700
    s.set_read_group(m_read_group);
118✔
701
    s.set_adapters(m_adapters);
118✔
702
  }
703

704
  std::swap(m_samples, samples);
60✔
705
}
194✔
706

707
void
708
sample_set::set_unidentified_read_group(read_group tmpl)
19✔
709
{
710
  // Unidentified reads lack a SM tag, so add a description instead
711
  tmpl.set_sample("");
19✔
712
  tmpl.set_description("unidentified");
19✔
713
  m_unidentified.set_read_group(tmpl);
19✔
714
}
19✔
715

716
std::ostream&
717
operator<<(std::ostream& os, const sample_set& value)
1✔
718
{
719
  return os << "sample_set{samples=[" << join_text(value, ", ") << "]"
2✔
720
            << ", unidentified=" << value.unidentified()
3✔
721
            << ", read_group=" << value.readgroup()
2✔
722
            << ", adapters=" << value.adapters() << "}";
3✔
723
}
724

725
////////////////////////////////////////////////////////////////////////////////
726

727
std::ostream&
728
operator<<(std::ostream& os, const barcode_orientation& value)
7✔
729
{
730
  switch (value) {
7✔
731
    case barcode_orientation::unspecified:
3✔
732
      return os << "barcode_orientation::unspecified";
3✔
733
    case barcode_orientation::forward:
2✔
734
      return os << "barcode_orientation::forward";
2✔
735
    case barcode_orientation::reverse:
2✔
736
      return os << "barcode_orientation::reverse";
2✔
UNCOV
737
    default:
×
NEW
738
      return os << "barcode_orientation{?}";
×
739
  }
740
}
741

742
std::ostream&
743
operator<<(std::ostream& os, const barcode_table_orientation& value)
4✔
744
{
745
  switch (value) {
4✔
746
    case barcode_table_orientation::unspecified:
1✔
747
      return os << "barcode_table_orientation::unspecified";
1✔
748
    case barcode_table_orientation::forward:
1✔
749
      return os << "barcode_table_orientation::forward";
1✔
750
    case barcode_table_orientation::reverse:
1✔
751
      return os << "barcode_table_orientation::reverse";
1✔
752
    case barcode_table_orientation::explicit_:
1✔
753
      return os << "barcode_table_orientation::explicit_";
1✔
UNCOV
754
    default:
×
NEW
755
      return os << "barcode_table_orientation{?}";
×
756
  }
757
}
758

759
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc