• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #46

27 Nov 2024 03:10PM UTC coverage: 27.245% (+1.0%) from 26.244%
#46

push

travis-ci

MikkelSchubert
fix convenience executable make target

2609 of 9576 relevant lines covered (27.25%)

4268.73 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

58.14
/src/sequence_sets.hpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2011 by Stinus Lindgreen - stinus@binf.ku.dk            *
5
 * Copyright (C) 2014 by Mikkel Schubert - mikkelsch@gmail.com           *
6
 *                                                                       *
7
 * If you use the program, please cite the paper:                        *
8
 * Schubert et al. (2016). AdapterRemoval v2: rapid adapter trimming,    *
9
 * identification, and read merging. BMC Research Notes, 12;9(1):88      *
10
 * https://doi.org/10.1186/s13104-016-1900-2                             *
11
 *                                                                       *
12
 * This program is free software: you can redistribute it and/or modify  *
13
 * it under the terms of the GNU General Public License as published by  *
14
 * the Free Software Foundation, either version 3 of the License, or     *
15
 * (at your option) any later version.                                   *
16
 *                                                                       *
17
 * This program is distributed in the hope that it will be useful,       *
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
20
 * GNU General Public License for more details.                          *
21
 *                                                                       *
22
 * You should have received a copy of the GNU General Public License     *
23
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
24
\*************************************************************************/
25
#pragma once
26

27
#include "sequence.hpp"     // for for dna_sequence
28
#include <cstddef>          // for size_t
29
#include <initializer_list> // for initializer_list
30
#include <string>           // for string
31
#include <string_view>      // for string_view
32
#include <vector>           // for vector
33

34
namespace adapterremoval {
35

36
using string_view_pair = std::pair<std::string_view, std::string_view>;
37

38
/** Contains SAM/BAM read-group information */
39
class read_group
78✔
40
{
41
public:
42
  read_group();
43

44
  /**
45
   * Parses a read-group string in the form "ID:1\tSM:sample" (optionally
46
   * including a leading "@RG\t"). Throws std::invalid_argument if the value
47
   * is invalid.
48
   */
49
  explicit read_group(std::string_view value);
50

51
  /** Returns the read-group ID for use in per-read 'RG' tags */
52
  [[nodiscard]] std::string_view id() const { return m_id; }
48✔
53

54
  /** Returns the full @RG header, not including a trailing new-line */
55
  [[nodiscard]] std::string_view header() const { return m_header; }
60✔
56

57
  /** Adds/replaces the barcode (ID) tag */
58
  void set_id(std::string_view id) { update_tag("ID", id); }
12✔
59

60
  /** Adds/replaces the sample (SM) tag */
61
  void set_sample(std::string_view name) { update_tag("SM", name); }
12✔
62

63
  /** Adds/replaces the barcode (BC) tag */
64
  void set_barcodes(std::string_view value) { update_tag("BC", value); }
×
65

66
private:
67
  /** Updates or adds the specified tag; sets `m_id` if key is `ID` */
68
  void update_tag(std::string_view key, std::string_view value);
69

70
  //! The full read_group header, including leading `@RG\t`
71
  std::string m_header{};
72
  //! Value mapping reads (via `RG:Z:${ID}`) to the @RG header
73
  std::string m_id{};
74
};
75

76
/**
77
 * Class for loading/handling adapter adapter sequences.
78
 *
79
 * Adapter sequences are found in one of two orientations:
80
 *  - Read orientation, corresponding to the sequence in input fastq reads
81
 *  - Alignment orientation, corresponding to the orientation used during
82
 *    sequence alignment. For the mate 1 adapter, this is read orientation,
83
 *    but for the mate 2 adapter this is the reverse complement.
84
 */
85
class adapter_set
125✔
86
{
87
public:
88
  /** Initialize empty adapter list. */
89
  adapter_set() = default;
198✔
90

91
  /** Initializes with adapters in read orientation */
92
  adapter_set(std::initializer_list<string_view_pair> args);
93

94
  /** Adds a pair of adapters to the set in read orientation */
95
  void add(dna_sequence adapter1, dna_sequence adapter2);
96

97
  /** Adds a pair of adapters to the set in read orientation */
98
  void add(std::string adapter1, std::string adapter2);
99

100
  /** Generate new adapter set with these barcodes (in read orientation) */
101
  [[nodiscard]] adapter_set add_barcodes(const dna_sequence& barcode1,
102
                                         const dna_sequence& barcode2) const;
103

104
  /**
105
   * Loads adapters in read orientation from a TSV file, throwing on failure.
106
   * Two adapter sequences are expected if 'paired_end_mode' is set.
107
   */
108
  void load(const std::string& filename, bool paired_end_mode);
109

110
  /** Returns the number of adapters/adapter pairs added/loaded */
111
  [[nodiscard]] size_t size() const { return m_adapters.size(); }
×
112

113
  /** Iterator over adapter sequences in alignment orientation */
114
  [[nodiscard]] auto begin() const { return m_adapters.begin(); }
240✔
115

116
  /** Terminal iterator over adapter sequences in alignment orientation */
117
  [[nodiscard]] auto end() const { return m_adapters.end(); }
240✔
118

119
  [[nodiscard]] const auto& at(size_t n) const { return m_adapters.at(n); }
120

121
  /** Returns the adapters in read orientation */
122
  [[nodiscard]] sequence_pair_vec to_read_orientation() const;
123

124
private:
125
  //! Adapter sequences in alignment orientation
126
  sequence_pair_vec m_adapters{};
127
};
128

129
/** Represents sequences used for identifying/processing a sample */
130
struct sample_sequences
131
{
132
  sample_sequences() = default;
6✔
133

134
  sample_sequences(dna_sequence barcode1, dna_sequence barcode2)
118✔
135
    : barcode_1(std::move(barcode1))
118✔
136
    , barcode_2(std::move(barcode2))
354✔
137
  {
138
  }
118✔
139

140
  //! Read-group for this sample/barcode combination
141
  read_group info{};
142
  //! Barcode expected to be found in mate 1 reads, if any (read orientation)
143
  dna_sequence barcode_1{};
144
  //! Barcode expected to be found in mate 2 reads, if any (read orientation)
145
  dna_sequence barcode_2{};
146
  //! Adapter set with the above barcodes added
147
  adapter_set adapters{};
148
};
149

150
/** Represents a demultiplexing sample with one or more barcodes */
151
class sample
436✔
152
{
153
public:
154
  sample() { add(dna_sequence{}, dna_sequence{}); }
×
155

156
  explicit sample(std::string name,
118✔
157
                  dna_sequence barcode1,
158
                  dna_sequence barcode2)
159
    : m_name(std::move(name))
118✔
160
  {
161
    add(std::move(barcode1), std::move(barcode2));
590✔
162
  };
118✔
163

164
  explicit sample(std::string name, std::string barcode1, std::string barcode2)
165
    : sample(name, dna_sequence{ barcode1 }, dna_sequence{ barcode2 }){};
166

167
  /** Adds a pair of barcodes in read orientation */
168
  void add(dna_sequence barcode1, dna_sequence barcode2);
169

170
  /** Adds barcodes in read orientation */
171
  void add(std::string barcode1, std::string barcode2);
172

173
  /** Assigns adapter sequences for each pair of barcodes */
174
  void set_adapters(const adapter_set& adapters);
175

176
  /** Assigns read groups for each pair of barcodes */
177
  void set_read_group(const read_group& info);
178

179
  /** Returns the unique name of this sample */
180
  [[nodiscard]] const auto& name() const { return m_name; }
539✔
181

182
  /** Returns the number of barcode sequences loaded */
183
  [[nodiscard]] size_t size() const { return m_barcodes.size(); }
×
184

185
  /** Iterator over adapter sequences in alignment orientation */
186
  [[nodiscard]] auto begin() const { return m_barcodes.begin(); }
284✔
187

188
  /** Terminal iterator over adapter sequences in alignment orientation */
189
  [[nodiscard]] auto end() const { return m_barcodes.end(); }
284✔
190

191
  /** Returns the nth barcode / pair of barcodes */
192
  [[nodiscard]] const auto& at(size_t n) const { return m_barcodes.at(n); }
×
193

194
private:
195
  //! Unique name associated with this sample
196
  std::string m_name{};
197
  //! Barcodes identifying this sample
198
  std::vector<sample_sequences> m_barcodes{};
199
};
200

201
/** Configuration for loading of barcode tables */
202
class barcode_config
203
{
204
public:
205
  barcode_config() = default;
×
206

207
  /**
208
   * If PE mode is enabled, barcode 1 and 2 together must be unique, otherwise
209
   * barcode 1 sequences alone must be unique to allow unambiguous
210
   * identification of samples
211
   */
212
  auto& paired_end_mode(bool value)
×
213
  {
214
    m_paired_end_mode = value;
×
215
    return *this;
×
216
  }
217

218
  /** Specifies if barcodes are expected in one or both orientations */
219
  auto& unidirectional_barcodes(bool value)
×
220
  {
221
    m_unidirectional_barcodes = value;
×
222
    return *this;
×
223
  }
224

225
  /** Enable or disable support for multiple barcodes for the same sample */
226
  auto& allow_multiple_barcodes(bool value)
×
227
  {
228
    m_allow_multiple_barcodes = value;
×
229
    return *this;
×
230
  }
231

232
private:
233
  friend class sample_set;
234

235
  //! Whether running in paired or single end mode; is used to determine whether
236
  //! or not samples can be uniquely identified from the barcodes provided
237
  bool m_paired_end_mode = false;
238
  //! Indicates if multiple barcodes/barcode pairs are allowed per sample
239
  bool m_allow_multiple_barcodes = false;
240
  //! Indicates if barcode pairs can be annealed in both orientations
241
  bool m_unidirectional_barcodes = true;
242
};
243

244
/**
245
 * Class for handling samples for demultiplexing. The class further checks for
246
 * the correctness of these sequences, and detects duplicate barcode sequences /
247
 * pairs of sequences.
248
 */
249
class sample_set
250
{
251
public:
252
  /** Creates barcode set with single unnamed sample with empty barcodes */
253
  sample_set();
254
  /** Creates barcode set from set of samples. Allows multiple barcodes */
255
  sample_set(std::initializer_list<sample> args);
256

257
  /** Sets adapter sequences for all samples */
258
  void set_adapters(adapter_set adapters);
259

260
  /** Sets read group for samples using information parsed using `read_group` */
261
  void set_read_group(std::string_view value);
262

263
  /** Clears existing samples and loads barcodes from a TSV file */
264
  void load(const std::string& filename, const barcode_config& config);
265

266
  /** Convenience function to get sequences for sample / barcode pair */
267
  [[nodiscard]] const auto& get_sequences(const size_t sample,
268
                                          const size_t barcodes) const
269
  {
270
    return m_samples.at(sample).at(barcodes);
271
  }
272

273
  /** Returns the number of (demultiplexing) samples */
274
  [[nodiscard]] size_t size() const { return m_samples.size(); }
62✔
275

276
  /** Iterator over (demultiplexing) samples */
277
  [[nodiscard]] auto begin() const { return m_samples.begin(); }
62✔
278

279
  /** Terminal iterator over (demultiplexing) samples */
280
  [[nodiscard]] auto end() const { return m_samples.end(); }
62✔
281

282
  /** Returns the nth (demultiplexing) sample */
283
  [[nodiscard]] const auto& at(size_t n) const { return m_samples.at(n); }
×
284

285
  /** Returns the original, user-supplied adapter sequences */
286
  [[nodiscard]] const adapter_set& adapters() const { return m_adapters; }
×
287

288
  /** Returns special sample representing uidentified reads */
289
  [[nodiscard]] const auto& unidentified() const { return m_unidentified; }
×
290

291
private:
292
  /** Adds the reverse complement of barcodes for all samples, if missing */
293
  void add_reversed_barcodes(const barcode_config& config);
294

295
  //! Demultiplexing samples. Names and barcode pairs are both unique
296
  std::vector<sample> m_samples{};
297
  //! Special sample representing unidentified samples;
298
  sample m_unidentified{};
299
  //! User-supplied read group used to generate per-sample read-groups
300
  read_group m_read_group{};
301
  //! User-supplied adapter sequences used to generate per-barcode adapters
302
  adapter_set m_adapters{};
303
};
304

305
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc