• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #45

20 Sep 2024 06:49PM UTC coverage: 26.244% (-49.2%) from 75.443%
#45

push

travis-ci

web-flow
attempt to fix coveralls run

2458 of 9366 relevant lines covered (26.24%)

4362.23 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

24.39
/src/sequence_sets.hpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2011 by Stinus Lindgreen - stinus@binf.ku.dk            *
5
 * Copyright (C) 2014 by Mikkel Schubert - mikkelsch@gmail.com           *
6
 *                                                                       *
7
 * If you use the program, please cite the paper:                        *
8
 * Schubert et al. (2016). AdapterRemoval v2: rapid adapter trimming,    *
9
 * identification, and read merging. BMC Research Notes, 12;9(1):88      *
10
 * https://doi.org/10.1186/s13104-016-1900-2                             *
11
 *                                                                       *
12
 * This program is free software: you can redistribute it and/or modify  *
13
 * it under the terms of the GNU General Public License as published by  *
14
 * the Free Software Foundation, either version 3 of the License, or     *
15
 * (at your option) any later version.                                   *
16
 *                                                                       *
17
 * This program is distributed in the hope that it will be useful,       *
18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
20
 * GNU General Public License for more details.                          *
21
 *                                                                       *
22
 * You should have received a copy of the GNU General Public License     *
23
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
24
\*************************************************************************/
25
#pragma once
26

27
#include "barcode_table.hpp"
28
#include "sequence.hpp"     // for for dna_sequence
29
#include <cstddef>          // for size_t
30
#include <initializer_list> // for initializer_list
31
#include <string>           // for string
32
#include <string_view>      // for string_view
33
#include <vector>           // for vector
34

35
namespace adapterremoval {
36

37
using string_view_pair = std::pair<std::string_view, std::string_view>;
38

39
/** Contains SAM/BAM read-group information */
40
class read_group
54✔
41
{
42
public:
43
  read_group();
44

45
  /**
46
   * Parses a read-group string in the form "ID:1\tSM:sample" (optionally
47
   * including a leading "@RG\t"). Throws std::invalid_argument if the value
48
   * is invalid.
49
   */
50
  explicit read_group(std::string_view value);
51

52
  /** Returns the read-group ID for use in per-read 'RG' tags */
53
  [[nodiscard]] std::string_view id() const { return m_id; }
48✔
54

55
  /** Returns the full @RG header, not including a trailing new-line */
56
  [[nodiscard]] std::string_view header() const { return m_header; }
60✔
57

58
  /** Adds/replaces the barcode (ID) tag */
59
  void set_id(std::string_view id) { update_tag("ID", id); }
12✔
60

61
  /** Adds/replaces the sample (SM) tag */
62
  void set_sample(std::string_view name) { update_tag("SM", name); }
12✔
63

64
  /** Adds/replaces the barcode (BC) tag */
65
  void set_barcodes(std::string_view value) { update_tag("BC", value); }
×
66

67
private:
68
  /** Updates or adds the specified tag; sets `m_id` if key is `ID` */
69
  void update_tag(std::string_view key, std::string_view value);
70

71
  //! The full read_group header, including leading `@RG\t`
72
  std::string m_header{};
73
  //! Value mapping reads (via `RG:Z:${ID}`) to the @RG header
74
  std::string m_id{};
75
};
76

77
/**
78
 * Class for loading/handling adapter adapter sequences.
79
 *
80
 * Adapter sequences are found in one of two orientations:
81
 *  - Read orientation, corresponding to the sequence in input fastq reads
82
 *  - Alignment orientation, corresponding to the orientation used during
83
 *    sequence alignment. For the mate 1 adapter, this is read orientation,
84
 *    but for the mate 2 adapter this is the reverse complement.
85
 */
86
class adapter_set
117✔
87
{
88
public:
89
  /** Initialize empty adapter list. */
90
  adapter_set() = default;
2✔
91

92
  /** Initializes with adapters in read orientation */
93
  adapter_set(std::initializer_list<string_view_pair> args);
94

95
  /** Adds a pair of adapters to the set in read orientation */
96
  void add(dna_sequence adapter1, dna_sequence adapter2);
97

98
  /** Adds a pair of adapters to the set in read orientation */
99
  void add(std::string adapter1, std::string adapter2);
100

101
  /** Generate new adapter set with these barcodes (in read orientation) */
102
  [[nodiscard]] adapter_set add_barcodes(const dna_sequence& barcode1,
103
                                         const dna_sequence& barcode2) const;
104

105
  /**
106
   * Loads adapters in read orientation from a TSV file, throwing on failure.
107
   * Two adapter sequences are expected if 'paired_end_mode' is set.
108
   */
109
  void load(const std::string& filename, bool paired_end_mode);
110

111
  /** Returns the number of adapters/adapter pairs added/loaded */
112
  [[nodiscard]] size_t size() const { return m_adapters.size(); }
×
113

114
  /** Iterator over adapter sequences in alignment orientation */
115
  [[nodiscard]] auto begin() const { return m_adapters.begin(); }
240✔
116

117
  /** Terminal iterator over adapter sequences in alignment orientation */
118
  [[nodiscard]] auto end() const { return m_adapters.end(); }
240✔
119

120
  [[nodiscard]] const auto& at(size_t n) const { return m_adapters.at(n); }
121

122
  /** Returns the adapters in read orientation */
123
  [[nodiscard]] sequence_pair_vec to_read_orientation() const;
124

125
private:
126
  //! Adapter sequences in alignment orientation
127
  sequence_pair_vec m_adapters{};
128
};
129

130
/** Represents sequences used for identifying/processing a sample */
131
struct sample_sequences
132
{
133
  sample_sequences() = default;
6✔
134

135
  sample_sequences(dna_sequence barcode1, dna_sequence barcode2)
×
136
    : barcode_1(std::move(barcode1))
×
137
    , barcode_2(std::move(barcode2))
×
138
  {
139
  }
140

141
  //! Read-group for this sample/barcode combination
142
  read_group info{};
143
  //! Barcode expected to be found in mate 1 reads, if any (read orientation)
144
  dna_sequence barcode_1{};
145
  //! Barcode expected to be found in mate 2 reads, if any (read orientation)
146
  dna_sequence barcode_2{};
147
  //! Adapter set with the above barcodes added
148
  adapter_set adapters{};
149
};
150

151
/** Represents a demultiplexing sample with one or more barcodes */
152
class sample
×
153
{
154
public:
155
  sample() { add(dna_sequence{}, dna_sequence{}); }
×
156

157
  explicit sample(std::string name,
×
158
                  dna_sequence barcode1,
159
                  dna_sequence barcode2)
160
    : m_name(std::move(name))
×
161
  {
162
    add(std::move(barcode1), std::move(barcode2));
×
163
  };
164

165
  explicit sample(std::string name, std::string barcode1, std::string barcode2)
166
    : sample(name, dna_sequence{ barcode1 }, dna_sequence{ barcode2 }){};
167

168
  /** Adds a pair of barcodes in read orientation */
169
  void add(dna_sequence barcode1, dna_sequence barcode2);
170

171
  /** Adds barcodes in read orientation */
172
  void add(std::string barcode1, std::string barcode2);
173

174
  /** Assigns adapter sequences for each pair of barcodes */
175
  void set_adapters(const adapter_set& adapters);
176

177
  /** Assigns read groups for each pair of barcodes */
178
  void set_read_group(const read_group& info);
179

180
  /** Returns the unique name of this sample */
181
  [[nodiscard]] const auto& name() const { return m_name; }
×
182

183
  /** Returns the number of barcode sequences loaded */
184
  [[nodiscard]] size_t size() const { return m_barcodes.size(); }
×
185

186
  /** Iterator over adapter sequences in alignment orientation */
187
  [[nodiscard]] auto begin() const { return m_barcodes.begin(); }
×
188

189
  /** Terminal iterator over adapter sequences in alignment orientation */
190
  [[nodiscard]] auto end() const { return m_barcodes.end(); }
×
191

192
  /** Returns the nth barcode / pair of barcodes */
193
  [[nodiscard]] const auto& at(size_t n) const { return m_barcodes.at(n); }
×
194

195
private:
196
  //! Unique name associated with this sample
197
  std::string m_name{};
198
  //! Barcodes identifying this sample
199
  std::vector<sample_sequences> m_barcodes{};
200
};
201

202
/** Configuration for loading of barcode tables */
203
class barcode_config
204
{
205
public:
206
  barcode_config() = default;
×
207

208
  /**
209
   * If PE mode is enabled, barcode 1 and 2 together must be unique, otherwise
210
   * barcode 1 sequences alone must be unique to allow unambiguous
211
   * identification of samples
212
   */
213
  auto& paired_end_mode(bool value)
×
214
  {
215
    m_paired_end_mode = value;
×
216
    return *this;
×
217
  }
218

219
  /** Specifies if barcodes are expected in one or both orientations */
220
  auto& unidirectional_barcodes(bool value)
×
221
  {
222
    m_unidirectional_barcodes = value;
×
223
    return *this;
×
224
  }
225

226
  /** Enable or disable support for multiple barcodes for the same sample */
227
  auto& allow_multiple_barcodes(bool value)
×
228
  {
229
    m_allow_multiple_barcodes = value;
×
230
    return *this;
×
231
  }
232

233
private:
234
  friend class sample_set;
235

236
  //! Whether running in paired or single end mode; is used to determine whether
237
  //! or not samples can be uniquely identified from the barcodes provided
238
  bool m_paired_end_mode = false;
239
  //! Indicates if multiple barcodes/barcode pairs are allowed per sample
240
  bool m_allow_multiple_barcodes = false;
241
  //! Indicates if barcode pairs can be annealed in both orientations
242
  bool m_unidirectional_barcodes = true;
243
};
244

245
/**
246
 * Class for handling samples for demultiplexing. The class further checks for
247
 * the correctness of these sequences, and detects duplicate barcode sequences /
248
 * pairs of sequences.
249
 */
250
class sample_set
251
{
252
public:
253
  /** Creates barcode set with single unnamed sample with empty barcodes */
254
  sample_set();
255

256
  /** Sets adapter sequences for all samples */
257
  void set_adapters(adapter_set adapters);
258

259
  /** Sets read group for samples using information parsed using `read_group` */
260
  void set_read_group(std::string_view value);
261

262
  /** Clears existing samples and loads barcodes from a TSV file */
263
  void load(const std::string& filename, const barcode_config& config);
264

265
  /** Convenience function to get sequences for sample / barcode pair */
266
  [[nodiscard]] const auto& get_sequences(const size_t sample,
267
                                          const size_t barcodes) const
268
  {
269
    return m_samples.at(sample).at(barcodes);
270
  }
271

272
  /** Returns the number of (demultiplexing) samples */
273
  [[nodiscard]] size_t size() const { return m_samples.size(); }
×
274

275
  /** Iterator over (demultiplexing) samples */
276
  [[nodiscard]] auto begin() const { return m_samples.begin(); }
×
277

278
  /** Terminal iterator over (demultiplexing) samples */
279
  [[nodiscard]] auto end() const { return m_samples.end(); }
×
280

281
  /** Returns the nth (demultiplexing) sample */
282
  [[nodiscard]] const auto& at(size_t n) const { return m_samples.at(n); }
×
283

284
  /** Returns the original, user-supplied adapter sequences */
285
  [[nodiscard]] const adapter_set& adapters() const { return m_adapters; }
×
286

287
  /** Returns special sample representing uidentified reads */
288
  [[nodiscard]] const auto& unidentified() const { return m_unidentified; }
×
289

290
private:
291
  /** Adds the reverse complement of barcodes for all samples, if missing */
292
  void add_reversed_barcodes(const barcode_config& config);
293

294
  //! Demultiplexing samples. Names and barcode pairs are both unique
295
  std::vector<sample> m_samples{};
296
  //! Special sample representing unidentified samples;
297
  sample m_unidentified{};
298
  //! User-supplied read group used to generate per-sample read-groups
299
  read_group m_read_group{};
300
  //! User-supplied adapter sequences used to generate per-barcode adapters
301
  adapter_set m_adapters{};
302
};
303

304
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc