• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #64

16 Mar 2025 04:40PM UTC coverage: 27.111% (-0.04%) from 27.151%
#64

push

travis-ci

MikkelSchubert
add basic regression tests for SAM output

2597 of 9579 relevant lines covered (27.11%)

4267.51 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

58.14
/src/sequence_sets.hpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2011 by Stinus Lindgreen - stinus@binf.ku.dk            *
5
 * Copyright (C) 2014 by Mikkel Schubert - mikkelsch@gmail.com           *
6
 *                                                                       *
7
 * If you use the program, please cite the paper:                        *
8
 * Schubert et al. (2016). AdapterRemoval v2: rapid adapter trimming,    *
9
 * identification, and read merging. BMC Research Notes, 12;9(1):88      *
10
 * https://doi.org/10.1186/s13104-016-1900-2                             *
11
 *                                                                       *
12
 * This program is free software: you can redistribute it and/or modify  *
13
 * it under the terms of the GNU General Public License as published by  *
14
 * the Free Software Foundation, either version 3 of the License, or     *
15
 * (at your option) any later version.                                   *
16
 *                                                                       *
17
 * This program is distributed in the hope that it will be useful,       *
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
20
 * GNU General Public License for more details.                          *
21
 *                                                                       *
22
 * You should have received a copy of the GNU General Public License     *
23
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
24
\*************************************************************************/
25
#pragma once
26

27
#include "sequence.hpp"     // for for dna_sequence
28
#include <cstddef>          // for size_t
29
#include <initializer_list> // for initializer_list
30
#include <string>           // for string
31
#include <string_view>      // for string_view
32
#include <vector>           // for vector
33

34
namespace adapterremoval {
35

36
using string_view_pair = std::pair<std::string_view, std::string_view>;
37

38
/** Contains SAM/BAM read-group information */
39
class read_group
183✔
40
{
41
public:
42
  read_group();
43

44
  /**
45
   * Parses a read-group string in the form "ID:1\tSM:sample" (optionally
46
   * including a leading "@RG\t"). Throws std::invalid_argument if the value
47
   * is invalid.
48
   */
49
  explicit read_group(std::string_view value);
50

51
  /** Returns the read-group ID for use in per-read 'RG' tags */
52
  [[nodiscard]] std::string_view id() const { return m_id; }
32✔
53

54
  /** Returns the full @RG header, not including a trailing new-line */
55
  [[nodiscard]] std::string_view header() const { return m_header; }
60✔
56

57
  /** Adds/replaces the barcode (ID) tag */
58
  void set_id(std::string_view id);
59

60
  /** Adds/replaces the sample (SM) tag */
61
  void set_sample(std::string_view name) { update_tag("SM", name); }
16✔
62

63
  /** Adds/replaces the barcode (BC) tag */
64
  void set_barcodes(std::string_view value) { update_tag("BC", value); }
×
65

66
  /** Adds/replaces the comment (CO) tag */
67
  void set_comment(std::string_view value) { update_tag("CO", value); }
86✔
68

69
private:
70
  /** Updates or adds the specified tag; sets `m_id` if key is `ID` */
71
  void update_tag(std::string_view key, std::string_view value);
72

73
  //! The full read_group header, including leading `@RG\t`
74
  std::string m_header{};
75
  //! Value mapping reads (via `RG:Z:${ID}`) to the @RG header
76
  std::string m_id{};
77
};
78

79
/**
80
 * Class for loading/handling adapter adapter sequences.
81
 *
82
 * Adapter sequences are found in one of two orientations:
83
 *  - Read orientation, corresponding to the sequence in input fastq reads
84
 *  - Alignment orientation, corresponding to the orientation used during
85
 *    sequence alignment. For the mate 1 adapter, this is read orientation,
86
 *    but for the mate 2 adapter this is the reverse complement.
87
 */
88
class adapter_set
125✔
89
{
90
public:
91
  /** Initialize empty adapter list. */
92
  adapter_set() = default;
198✔
93

94
  /** Initializes with adapters in read orientation */
95
  adapter_set(std::initializer_list<string_view_pair> args);
96

97
  /** Adds a pair of adapters to the set in read orientation */
98
  void add(dna_sequence adapter1, dna_sequence adapter2);
99

100
  /** Adds a pair of adapters to the set in read orientation */
101
  void add(std::string adapter1, std::string adapter2);
102

103
  /** Generate new adapter set with these barcodes (in read orientation) */
104
  [[nodiscard]] adapter_set add_barcodes(const dna_sequence& barcode1,
105
                                         const dna_sequence& barcode2) const;
106

107
  /**
108
   * Loads adapters in read orientation from a TSV file, throwing on failure.
109
   * Two adapter sequences are expected if 'paired_end_mode' is set.
110
   */
111
  void load(const std::string& filename, bool paired_end_mode);
112

113
  /** Returns the number of adapters/adapter pairs added/loaded */
114
  [[nodiscard]] size_t size() const { return m_adapters.size(); }
×
115

116
  /** Iterator over adapter sequences in alignment orientation */
117
  [[nodiscard]] auto begin() const { return m_adapters.begin(); }
240✔
118

119
  /** Terminal iterator over adapter sequences in alignment orientation */
120
  [[nodiscard]] auto end() const { return m_adapters.end(); }
240✔
121

122
  [[nodiscard]] const auto& at(size_t n) const { return m_adapters.at(n); }
123

124
  /** Returns the adapters in read orientation */
125
  [[nodiscard]] sequence_pair_vec to_read_orientation() const;
126

127
private:
128
  //! Adapter sequences in alignment orientation
129
  sequence_pair_vec m_adapters{};
130
};
131

132
/** Represents sequences used for identifying/processing a sample */
133
struct sample_sequences
134
{
135
  sample_sequences() = default;
6✔
136

137
  sample_sequences(dna_sequence barcode1, dna_sequence barcode2)
118✔
138
    : barcode_1(std::move(barcode1))
118✔
139
    , barcode_2(std::move(barcode2))
354✔
140
  {
141
  }
118✔
142

143
  //! Whether read groups are specified for this set of sequences
144
  bool has_read_group{};
145
  //! Read-group for this sample/barcode combination
146
  read_group info{};
147
  //! Barcode expected to be found in mate 1 reads, if any (read orientation)
148
  dna_sequence barcode_1{};
149
  //! Barcode expected to be found in mate 2 reads, if any (read orientation)
150
  dna_sequence barcode_2{};
151
  //! Adapter set with the above barcodes added
152
  adapter_set adapters{};
153
};
154

155
/** Represents a demultiplexing sample with one or more barcodes */
156
class sample
436✔
157
{
158
public:
159
  sample() { add(dna_sequence{}, dna_sequence{}); }
×
160

161
  explicit sample(std::string name,
118✔
162
                  dna_sequence barcode1,
163
                  dna_sequence barcode2)
164
    : m_name(std::move(name))
118✔
165
  {
166
    add(std::move(barcode1), std::move(barcode2));
590✔
167
  };
118✔
168

169
  explicit sample(std::string name, std::string barcode1, std::string barcode2)
170
    : sample(name, dna_sequence{ barcode1 }, dna_sequence{ barcode2 }) {};
171

172
  /** Adds a pair of barcodes in read orientation */
173
  void add(dna_sequence barcode1, dna_sequence barcode2);
174

175
  /** Adds barcodes in read orientation */
176
  void add(std::string barcode1, std::string barcode2);
177

178
  /** Assigns adapter sequences for each pair of barcodes */
179
  void set_adapters(const adapter_set& adapters);
180

181
  /** Assigns read groups for each pair of barcodes */
182
  void set_read_group(const read_group& info);
183

184
  /** Returns the unique name of this sample */
185
  [[nodiscard]] const auto& name() const { return m_name; }
539✔
186

187
  /** Returns the number of barcode sequences loaded */
188
  [[nodiscard]] size_t size() const { return m_barcodes.size(); }
×
189

190
  /** Iterator over adapter sequences in alignment orientation */
191
  [[nodiscard]] auto begin() const { return m_barcodes.begin(); }
284✔
192

193
  /** Terminal iterator over adapter sequences in alignment orientation */
194
  [[nodiscard]] auto end() const { return m_barcodes.end(); }
284✔
195

196
  /** Returns the nth barcode / pair of barcodes */
197
  [[nodiscard]] const auto& at(size_t n) const { return m_barcodes.at(n); }
×
198

199
private:
200
  //! Unique name associated with this sample
201
  std::string m_name{};
202
  //! Barcodes identifying this sample
203
  std::vector<sample_sequences> m_barcodes{};
204
};
205

206
/** Configuration for loading of barcode tables */
207
class barcode_config
208
{
209
public:
210
  barcode_config() = default;
×
211

212
  /**
213
   * If PE mode is enabled, barcode 1 and 2 together must be unique, otherwise
214
   * barcode 1 sequences alone must be unique to allow unambiguous
215
   * identification of samples
216
   */
217
  auto& paired_end_mode(bool value)
×
218
  {
219
    m_paired_end_mode = value;
×
220
    return *this;
×
221
  }
222

223
  /** Specifies if barcodes are expected in one or both orientations */
224
  auto& unidirectional_barcodes(bool value)
×
225
  {
226
    m_unidirectional_barcodes = value;
×
227
    return *this;
×
228
  }
229

230
  /** Enable or disable support for multiple barcodes for the same sample */
231
  auto& allow_multiple_barcodes(bool value)
×
232
  {
233
    m_allow_multiple_barcodes = value;
×
234
    return *this;
×
235
  }
236

237
private:
238
  friend class sample_set;
239

240
  //! Whether running in paired or single end mode; is used to determine whether
241
  //! or not samples can be uniquely identified from the barcodes provided
242
  bool m_paired_end_mode = false;
243
  //! Indicates if multiple barcodes/barcode pairs are allowed per sample
244
  bool m_allow_multiple_barcodes = false;
245
  //! Indicates if barcode pairs can be annealed in both orientations
246
  bool m_unidirectional_barcodes = true;
247
};
248

249
/**
250
 * Class for handling samples for demultiplexing. The class further checks for
251
 * the correctness of these sequences, and detects duplicate barcode sequences /
252
 * pairs of sequences.
253
 */
254
class sample_set
255
{
256
public:
257
  /** Creates barcode set with single unnamed sample with empty barcodes */
258
  sample_set();
259
  /** Creates barcode set from set of samples. Allows multiple barcodes */
260
  sample_set(std::initializer_list<sample> args);
261

262
  /** Sets adapter sequences for all samples */
263
  void set_adapters(adapter_set adapters);
264

265
  /** Sets read group for samples using information parsed using `read_group` */
266
  void set_read_group(std::string_view value);
267

268
  /** Clears existing samples and loads barcodes from a TSV file */
269
  void load(const std::string& filename, const barcode_config& config);
270

271
  /** Convenience function to get sequences for sample / barcode pair */
272
  [[nodiscard]] const auto& get_sequences(const size_t sample,
273
                                          const size_t barcodes) const
274
  {
275
    return m_samples.at(sample).at(barcodes);
276
  }
277

278
  /** Returns the number of (demultiplexing) samples */
279
  [[nodiscard]] size_t size() const { return m_samples.size(); }
62✔
280

281
  /** Iterator over (demultiplexing) samples */
282
  [[nodiscard]] auto begin() const { return m_samples.begin(); }
62✔
283

284
  /** Terminal iterator over (demultiplexing) samples */
285
  [[nodiscard]] auto end() const { return m_samples.end(); }
62✔
286

287
  /** Returns the nth (demultiplexing) sample */
288
  [[nodiscard]] const auto& at(size_t n) const { return m_samples.at(n); }
×
289

290
  /** Returns the original, user-supplied adapter sequences */
291
  [[nodiscard]] const adapter_set& adapters() const { return m_adapters; }
×
292

293
  /** Returns special sample representing uidentified reads */
294
  [[nodiscard]] const auto& unidentified() const { return m_unidentified; }
×
295

296
private:
297
  /** Sets read-group for unidentified reads */
298
  void set_unidentified_read_group(read_group tmpl);
299

300
  /** Adds the reverse complement of barcodes for all samples, if missing */
301
  void add_reversed_barcodes(const barcode_config& config);
302

303
  //! Demultiplexing samples. Names and barcode pairs are both unique
304
  std::vector<sample> m_samples{};
305
  //! Special sample representing unidentified samples;
306
  sample m_unidentified{};
307
  //! User-supplied read group used to generate per-sample read-groups
308
  read_group m_read_group{};
309
  //! User-supplied adapter sequences used to generate per-barcode adapters
310
  adapter_set m_adapters{};
311
};
312

313
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc