• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #91

13 Apr 2025 11:50AM UTC coverage: 27.932% (+0.8%) from 27.089%
#91

push

travis-ci

web-flow
rework initializer_list constructor for sample_set (#118)

This to avoid having two ways to construct sets, one of which was only
used for testing purposes. By using the same code everywhere, test
coverage is naturally increased

12 of 15 new or added lines in 3 files covered. (80.0%)

3 existing lines in 1 file now uncovered.

2808 of 10053 relevant lines covered (27.93%)

4011.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

72.09
/src/sequence_sets.hpp
1
// SPDX-License-Identifier: GPL-3.0-or-later
2
// SPDX-FileCopyrightText: 2011 Stinus Lindgreen <stinus@binf.ku.dk>
3
// SPDX-FileCopyrightText: 2014 Mikkel Schubert <mikkelsch@gmail.com>
4
#pragma once
5

6
#include "sequence.hpp"     // for for dna_sequence
7
#include <cstddef>          // for size_t
8
#include <initializer_list> // for initializer_list
9
#include <string>           // for string
10
#include <string_view>      // for string_view
11
#include <utility>          // for move
12
#include <vector>           // for vector
13

14
namespace adapterremoval {
15

16
class line_reader_base;
17

18
using string_view_pair = std::pair<std::string_view, std::string_view>;
19

20
/** Contains SAM/BAM read-group information */
21
class read_group
72✔
22
{
23
public:
24
  read_group();
25

26
  /**
27
   * Parses a read-group string in the form "ID:1\tSM:sample" (optionally
28
   * including a leading "@RG\t"). Throws std::invalid_argument if the value
29
   * is invalid.
30
   */
31
  explicit read_group(std::string_view value);
32

33
  /** Returns the read-group ID for use in per-read 'RG' tags */
34
  [[nodiscard]] std::string_view id() const { return m_id; }
16✔
35

36
  /** Returns the full @RG header, not including a trailing new-line */
37
  [[nodiscard]] std::string_view header() const { return m_header; }
30✔
38

39
  /** Adds/replaces the barcode (ID) tag */
40
  void set_id(std::string_view id);
41

42
  /** Adds/replaces the sample (SM) tag */
43
  void set_sample(std::string_view name) { update_tag("SM", name); }
73✔
44

45
  /** Adds/replaces the barcode (BC) tag */
46
  void set_barcodes(std::string_view value) { update_tag("BC", value); }
65✔
47

48
  /** Adds/replaces the description (DS) tag */
49
  void set_description(std::string_view value) { update_tag("DS", value); }
4✔
50

51
private:
52
  /** Updates or adds the specified tag; sets `m_id` if key is `ID` */
53
  void update_tag(std::string_view key, std::string_view value);
54

55
  //! The full read_group header, including leading `@RG\t`
56
  std::string m_header{};
57
  //! Value mapping reads (via `RG:Z:${ID}`) to the @RG header
58
  std::string m_id{};
59
};
60

61
/**
62
 * Class for loading/handling adapter adapter sequences.
63
 *
64
 * Adapter sequences are found in one of two orientations:
65
 *  - Read orientation, corresponding to the sequence in input fastq reads
66
 *  - Alignment orientation, corresponding to the orientation used during
67
 *    sequence alignment. For the mate 1 adapter, this is read orientation,
68
 *    but for the mate 2 adapter this is the reverse complement.
69
 */
70
class adapter_set
257✔
71
{
72
public:
73
  /** Initialize empty adapter list. */
74
  adapter_set() = default;
236✔
75

76
  /** Initializes with adapters in read orientation */
77
  adapter_set(std::initializer_list<string_view_pair> args);
78

79
  /** Adds a pair of adapters to the set in read orientation */
80
  void add(dna_sequence adapter1, dna_sequence adapter2);
81

82
  /** Adds a pair of adapters to the set in read orientation */
83
  void add(std::string adapter1, std::string adapter2);
84

85
  /** Generate new adapter set with these barcodes (in read orientation) */
86
  [[nodiscard]] adapter_set add_barcodes(const dna_sequence& barcode1,
87
                                         const dna_sequence& barcode2) const;
88

89
  /**
90
   * Loads adapters in read orientation from a TSV file, throwing on failure.
91
   * Two adapter sequences are expected if 'paired_end_mode' is set.
92
   */
93
  void load(const std::string& filename, bool paired_end_mode);
94

95
  /** Returns the number of adapters/adapter pairs added/loaded */
96
  [[nodiscard]] size_t size() const { return m_adapters.size(); }
×
97

98
  /** Iterator over adapter sequences in alignment orientation */
99
  [[nodiscard]] auto begin() const { return m_adapters.begin(); }
240✔
100

101
  /** Terminal iterator over adapter sequences in alignment orientation */
102
  [[nodiscard]] auto end() const { return m_adapters.end(); }
240✔
103

104
  [[nodiscard]] const auto& at(size_t n) const { return m_adapters.at(n); }
105

106
  /** Returns the adapters in read orientation */
107
  [[nodiscard]] sequence_pair_vec to_read_orientation() const;
108

109
private:
110
  //! Adapter sequences in alignment orientation
111
  sequence_pair_vec m_adapters{};
112
};
113

114
/** Represents sequences used for identifying/processing a sample */
115
struct sample_sequences
116
{
117
  sample_sequences() = default;
6✔
118

119
  sample_sequences(dna_sequence barcode1, dna_sequence barcode2)
127✔
120
    : barcode_1(std::move(barcode1))
127✔
121
    , barcode_2(std::move(barcode2))
381✔
122
  {
123
  }
127✔
124

125
  //! Whether read groups are specified for this set of sequences
126
  bool has_read_group{};
127
  //! Read-group for this sample/barcode combination
128
  read_group info{};
129
  //! Barcode expected to be found in mate 1 reads, if any (read orientation)
130
  dna_sequence barcode_1{};
131
  //! Barcode expected to be found in mate 2 reads, if any (read orientation)
132
  dna_sequence barcode_2{};
133
  //! Adapter set with the above barcodes added
134
  adapter_set adapters{};
135
};
136

137
/** Represents a demultiplexing sample with one or more barcodes */
138
class sample
248✔
139
{
140
public:
141
  sample() { add(dna_sequence{}, dna_sequence{}); }
252✔
142

143
  explicit sample(std::string name,
85✔
144
                  dna_sequence barcode1,
145
                  dna_sequence barcode2)
146
    : m_name(std::move(name))
85✔
147
  {
148
    add(std::move(barcode1), std::move(barcode2));
425✔
149
  };
85✔
150

151
  explicit sample(std::string name, std::string barcode1, std::string barcode2)
152
    : sample(name, dna_sequence{ barcode1 }, dna_sequence{ barcode2 }) {};
153

154
  /** Adds a pair of barcodes in read orientation */
155
  void add(dna_sequence barcode1, dna_sequence barcode2);
156

157
  /** Adds barcodes in read orientation */
158
  void add(std::string barcode1, std::string barcode2);
159

160
  /** Assigns adapter sequences for each pair of barcodes */
161
  void set_adapters(const adapter_set& adapters);
162

163
  /** Assigns read groups for each pair of barcodes */
164
  void set_read_group(const read_group& info);
165

166
  /** Returns the unique name of this sample */
167
  [[nodiscard]] const auto& name() const { return m_name; }
191✔
168

169
  /** Returns the number of barcode sequences loaded */
170
  [[nodiscard]] size_t size() const { return m_barcodes.size(); }
×
171

172
  /** Iterator over adapter sequences in alignment orientation */
173
  [[nodiscard]] auto begin() const { return m_barcodes.begin(); }
296✔
174

175
  /** Terminal iterator over adapter sequences in alignment orientation */
176
  [[nodiscard]] auto end() const { return m_barcodes.end(); }
296✔
177

178
  /** Returns the nth barcode / pair of barcodes */
179
  [[nodiscard]] const auto& at(size_t n) const { return m_barcodes.at(n); }
×
180

181
private:
182
  //! Unique name associated with this sample
183
  std::string m_name{};
184
  //! Barcodes identifying this sample
185
  std::vector<sample_sequences> m_barcodes{};
186
};
187

188
/** Configuration for loading of barcode tables */
189
class barcode_config
190
{
191
public:
192
  barcode_config() = default;
42✔
193

194
  /**
195
   * If PE mode is enabled, barcode 1 and 2 together must be unique, otherwise
196
   * barcode 1 sequences alone must be unique to allow unambiguous
197
   * identification of samples
198
   */
199
  auto& paired_end_mode(bool value = true)
12✔
200
  {
201
    m_paired_end_mode = value;
12✔
202
    return *this;
12✔
203
  }
204

205
  /** Specifies if barcodes are expected in one or both orientations */
NEW
206
  auto& unidirectional_barcodes(bool value = true)
×
207
  {
208
    m_unidirectional_barcodes = value;
×
209
    return *this;
×
210
  }
211

212
  /** Enable or disable support for multiple barcodes for the same sample */
NEW
213
  auto& allow_multiple_barcodes(bool value = true)
×
214
  {
215
    m_allow_multiple_barcodes = value;
×
216
    return *this;
×
217
  }
218

219
private:
220
  friend class sample_set;
221

222
  //! Whether running in paired or single end mode; is used to determine whether
223
  //! or not samples can be uniquely identified from the barcodes provided
224
  bool m_paired_end_mode = false;
225
  //! Indicates if multiple barcodes/barcode pairs are allowed per sample
226
  bool m_allow_multiple_barcodes = false;
227
  //! Indicates if barcode pairs can be annealed in both orientations
228
  bool m_unidirectional_barcodes = true;
229
};
230

231
/**
232
 * Class for handling samples for demultiplexing. The class further checks for
233
 * the correctness of these sequences, and detects duplicate barcode sequences /
234
 * pairs of sequences.
235
 */
236
class sample_set
237
{
238
public:
239
  /** Creates sample set with single unnamed sample with empty barcodes */
240
  sample_set();
241
  /** Creates sample set from  lines representing a barcode table */
242
  sample_set(std::initializer_list<std::string_view> lines,
243
             barcode_config config = {});
244

245
  /** Sets adapter sequences for all samples */
246
  void set_adapters(adapter_set adapters);
247

248
  /** Sets read group for samples using information parsed using `read_group` */
249
  void set_read_group(std::string_view value);
250

251
  /** Clears existing samples and loads barcodes from a TSV file */
252
  void load(const std::string& filename, const barcode_config& config);
253
  /** Clears existing samples and loads barcodes from a TSV file */
254
  void load(line_reader_base& reader,
255
            const barcode_config& config,
256
            const std::string& filename);
257

258
  /** Convenience function to get sequences for sample / barcode pair */
259
  [[nodiscard]] const auto& get_sequences(const size_t sample,
260
                                          const size_t barcodes) const
261
  {
262
    return m_samples.at(sample).at(barcodes);
263
  }
264

265
  /** Returns the number of (demultiplexing) samples */
266
  [[nodiscard]] size_t size() const { return m_samples.size(); }
31✔
267

268
  /** Iterator over (demultiplexing) samples */
269
  [[nodiscard]] auto begin() const { return m_samples.begin(); }
62✔
270

271
  /** Terminal iterator over (demultiplexing) samples */
272
  [[nodiscard]] auto end() const { return m_samples.end(); }
62✔
273

274
  /** Returns the nth (demultiplexing) sample */
275
  [[nodiscard]] const auto& at(size_t n) const { return m_samples.at(n); }
×
276

277
  /** Returns the original, user-supplied adapter sequences */
278
  [[nodiscard]] const adapter_set& adapters() const { return m_adapters; }
×
279

280
  /** Returns special sample representing uidentified reads */
281
  [[nodiscard]] const auto& unidentified() const { return m_unidentified; }
×
282

283
private:
284
  /** Sets read-group for unidentified reads */
285
  void set_unidentified_read_group(read_group tmpl);
286

287
  /** Adds the reverse complement of barcodes for all samples, if missing */
288
  void add_reversed_barcodes(const barcode_config& config);
289

290
  //! Demultiplexing samples. Names and barcode pairs are both unique
291
  std::vector<sample> m_samples{};
292
  //! Special sample representing unidentified samples;
293
  sample m_unidentified{};
294
  //! User-supplied read group used to generate per-sample read-groups
295
  read_group m_read_group{};
296
  //! User-supplied adapter sequences used to generate per-barcode adapters
297
  adapter_set m_adapters{};
298
};
299

300
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc