• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #117

25 May 2025 03:01PM UTC coverage: 66.932% (-0.07%) from 67.006%
#117

push

travis-ci

web-flow
iwyu and reduce build-time inter-dependencies (#144)

26 of 145 new or added lines in 20 files covered. (17.93%)

89 existing lines in 5 files now uncovered.

9738 of 14549 relevant lines covered (66.93%)

3041.19 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/demultiplexing.cpp
1
// SPDX-License-Identifier: GPL-3.0-or-later
2
// SPDX-FileCopyrightText: 2021 Stinus Lindgreen <stinus@binf.ku.dk>
3
// SPDX-FileCopyrightText: 2014 Mikkel Schubert <mikkelsch@gmail.com>
4
#include "demultiplexing.hpp" // declarations
5
#include "barcode_table.hpp"  // for barcode_table
6
#include "commontypes.hpp"    // for read_type, read_file
7
#include "counts.hpp"         // for counts
8
#include "debug.hpp"          // for AR_REQUIRE, AR_REQUIRE_SINGLE_THREAD
9
#include "fastq.hpp"          // for fastq
10
#include "output.hpp"         // for output_files
11
#include "sequence_sets.hpp"  // for adapter_set
12
#include "statistics.hpp"     // for demux_statistics
13
#include "userconfig.hpp"     // for userconfig, ar_command, ar_command...
14
#include <cstddef>            // for size_t
15
#include <memory>             // for unique_ptr
16
#include <string>             // for basic_string
17
#include <utility>            // for move
18

19
namespace adapterremoval {
20

21
///////////////////////////////////////////////////////////////////////////////
22
// Implementations for `demultiplex_reads`
23

24
demultiplex_reads::demultiplex_reads(const userconfig& config,
×
25
                                     const post_demux_steps& steps,
26
                                     demux_stats_ptr stats)
×
27
  : analytical_step(processing_order::ordered, "demultiplex_reads")
NEW
28
  , m_samples(*config.samples)
×
29
  , m_barcode_table(m_samples,
×
30
                    config.barcode_mm,
×
31
                    config.barcode_mm_r1,
×
32
                    config.barcode_mm_r2)
×
33
  , m_config(config)
×
34
  , m_steps(steps)
×
35
  , m_cache(steps)
×
36
  , m_statistics(std::move(stats))
×
37
{
38
  AR_REQUIRE(m_samples.size());
×
39
  AR_REQUIRE(m_samples.size() == m_steps.samples.size());
×
40
  AR_REQUIRE(m_statistics);
×
41

42
  AR_REQUIRE(m_statistics->samples.empty());
×
43
  m_statistics->samples.resize(m_samples.size());
×
44

45
  // Map global barcode offsets to sample and relative barcode offsets
46
  for (size_t i = 0; i < m_samples.size(); ++i) {
×
47
    const size_t barcodes = m_samples.at(i).size();
×
48

49
    m_statistics->samples.at(i).resize_up_to(barcodes);
×
50
    for (size_t j = 0; j < barcodes; ++j) {
×
51
      m_barcodes.emplace_back(i, j);
×
52
    }
53
  }
54
}
55

56
///////////////////////////////////////////////////////////////////////////////
57

58
demultiplex_se_reads::demultiplex_se_reads(const userconfig& config,
×
59
                                           const post_demux_steps& steps,
60
                                           demux_stats_ptr stats)
×
61
  : demultiplex_reads(config, steps, std::move(stats))
×
62
{
63
}
64

65
chunk_vec
66
demultiplex_se_reads::process(chunk_ptr chunk)
×
67
{
68
  AR_REQUIRE(chunk);
×
69
  AR_REQUIRE_SINGLE_THREAD(m_lock);
×
70
  for (auto& read : chunk->reads_1) {
×
71
    const auto [sample, barcode] = m_barcode_table.identify(read);
×
72
    // TODO: We should keep reads even if we cannot identify the exact barcode
73
    if (sample < 0 || barcode < 0) {
×
74
      switch (sample) {
×
75
        case barcode_key::unidentified:
×
76
          m_statistics->unidentified += 1;
×
77
          break;
×
78
        case barcode_key::ambiguous:
×
79
          m_statistics->ambiguous += 1;
×
80
          break;
×
81
        default:
×
82
          AR_FAIL("invalid barcode match sample");
×
83
      }
84

85
      m_cache.add_unidentified_1(std::move(read));
×
86
    } else {
87
      read.truncate(m_barcode_table.length_1());
×
88

89
      m_statistics->samples.at(sample).inc(barcode);
×
90
      m_cache.add_read_1(std::move(read), sample, barcode);
×
91
    }
92
  }
93

94
  return m_cache.flush(chunk->eof, chunk->mate_separator);
×
95
}
96

97
///////////////////////////////////////////////////////////////////////////////
98

99
demultiplex_pe_reads::demultiplex_pe_reads(const userconfig& config,
×
100
                                           const post_demux_steps& steps,
101
                                           demux_stats_ptr stats)
×
102
  : demultiplex_reads(config, steps, std::move(stats))
×
103
{
104
}
105

106
chunk_vec
107
demultiplex_pe_reads::process(chunk_ptr chunk)
×
108
{
109
  AR_REQUIRE(chunk);
×
110
  AR_REQUIRE_SINGLE_THREAD(m_lock);
×
111
  AR_REQUIRE(chunk->reads_1.size() == chunk->reads_2.size());
×
112

113
  auto it_1 = chunk->reads_1.begin();
×
114
  auto it_2 = chunk->reads_2.begin();
×
115
  for (; it_1 != chunk->reads_1.end(); ++it_1, ++it_2) {
×
116
    const auto [sample, barcode] = m_barcode_table.identify(*it_1, *it_2);
×
117

118
    // TODO: We should keep reads even if we cannot identify the exact barcode
119
    if (sample < 0 || barcode < 0) {
×
120
      switch (sample) {
×
121
        case barcode_key::unidentified:
×
122
          m_statistics->unidentified += 2;
×
123
          break;
×
124
        case barcode_key::ambiguous:
×
125
          m_statistics->ambiguous += 2;
×
126
          break;
×
127
        default:
×
128
          AR_FAIL("invalid barcode match sample");
×
129
      }
130

131
      m_cache.add_unidentified_1(std::move(*it_1));
×
132
      m_cache.add_unidentified_2(std::move(*it_2));
×
133
    } else {
134
      it_1->truncate(m_barcode_table.length_1());
×
135
      m_cache.add_read_1(std::move(*it_1), sample, barcode);
×
136

137
      it_2->truncate(m_barcode_table.length_2());
×
138
      m_cache.add_read_2(std::move(*it_2), sample);
×
139

140
      m_statistics->samples.at(sample).inc(barcode, 2);
×
141
    }
142
  }
143

144
  return m_cache.flush(chunk->eof, chunk->mate_separator);
×
145
}
146

147
///////////////////////////////////////////////////////////////////////////////
148

149
process_demultiplexed::process_demultiplexed(const userconfig& config,
×
150
                                             const sample_output_files& output,
151
                                             const size_t sample,
152
                                             trim_stats_ptr sink)
×
153
  : analytical_step(processing_order::unordered, "process_demultiplexed")
154
  , m_config(config)
×
155
  , m_output(output)
×
NEW
156
  , m_samples(*config.samples)
×
157
  , m_sample(sample)
×
158
  , m_stats_sink(std::move(sink))
×
159
{
160
  m_stats.emplace_back_n(m_config.max_threads, m_config.report_sample_rate);
×
161
}
162

163
chunk_vec
164
process_demultiplexed::process(chunk_ptr chunk)
×
165
{
166
  AR_REQUIRE(chunk);
×
167
  processed_reads chunks{ m_output };
×
168
  chunks.set_sample(m_samples.at(m_sample));
×
169
  chunks.set_mate_separator(chunk->mate_separator);
×
170
  chunks.set_demultiplexing_only(true);
×
171

172
  if (chunk->first) {
×
173
    chunks.write_headers(m_config.args);
×
174
  }
175

176
  auto stats = m_stats.acquire();
×
177

178
  AR_REQUIRE(chunk->reads_1.size() == chunk->barcodes.size());
×
179
  auto barcode = chunk->barcodes.begin();
×
180

181
  if (m_config.paired_ended_mode) {
×
182
    AR_REQUIRE(chunk->reads_1.size() == chunk->reads_2.size());
×
183

184
    auto it_1 = chunk->reads_1.begin();
×
185
    auto it_2 = chunk->reads_2.begin();
×
186
    for (; it_1 != chunk->reads_1.end(); ++it_1, ++it_2, ++barcode) {
×
187
      it_1->add_prefix_to_name(m_config.prefix_read_1);
×
188
      stats->read_1->process(*it_1);
×
189
      chunks.add(*it_1, read_type::pe_1, *barcode);
×
190

191
      it_2->add_prefix_to_name(m_config.prefix_read_2);
×
192
      stats->read_2->process(*it_2);
×
193
      chunks.add(*it_2, read_type::pe_2, *barcode);
×
194
    }
195
  } else {
196
    for (auto& read : chunk->reads_1) {
×
197
      read.add_prefix_to_name(m_config.prefix_read_1);
×
198

199
      stats->read_1->process(read);
×
200
      chunks.add(read, read_type::se, *barcode++);
×
201
    }
202
  }
203

204
  m_stats.release(stats);
×
205

206
  return chunks.finalize(chunk->eof);
×
207
}
208

209
void
210
process_demultiplexed::finalize()
×
211
{
212
  m_stats.merge_into(*m_stats_sink);
×
213
}
214

215
///////////////////////////////////////////////////////////////////////////////
216

217
processes_unidentified::processes_unidentified(const userconfig& config,
×
218
                                               const output_files& output,
219
                                               demux_stats_ptr stats)
×
220
  : analytical_step(processing_order::unordered, "processes_unidentified")
221
  , m_config(config)
×
222
  , m_statistics(std::move(stats))
×
223
{
224
  m_output.set_file(read_file::mate_1, output.unidentified_1);
×
225
  m_output.set_file(read_file::mate_2, output.unidentified_2);
×
226

227
  if (output.unidentified_1_step != output_files::disabled) {
×
228
    m_output.set_step(read_file::mate_1, output.unidentified_1_step);
×
229
  }
230

231
  if (output.unidentified_1_step != output.unidentified_2_step &&
×
232
      output.unidentified_2_step != output_files::disabled) {
×
233
    m_output.set_step(read_file::mate_2, output.unidentified_2_step);
×
234
  }
235

236
  m_stats_1.emplace_back_n(m_config.max_threads, config.report_sample_rate);
×
237
  m_stats_2.emplace_back_n(m_config.max_threads, config.report_sample_rate);
×
238
}
239

240
chunk_vec
241
processes_unidentified::process(chunk_ptr chunk)
×
242
{
243
  AR_REQUIRE(chunk);
×
244
  processed_reads chunks{ m_output };
×
NEW
245
  chunks.set_sample(m_config.samples->unidentified());
×
246
  chunks.set_mate_separator(chunk->mate_separator);
×
247

248
  if (chunk->first) {
×
249
    chunks.write_headers(m_config.args);
×
250
  }
251

252
  auto stats_1 = m_stats_1.acquire();
×
253
  auto stats_2 = m_stats_2.acquire();
×
254

255
  if (m_config.paired_ended_mode) {
×
256
    AR_REQUIRE(chunk->reads_1.size() == chunk->reads_2.size());
×
257

258
    auto it_1 = chunk->reads_1.begin();
×
259
    auto it_2 = chunk->reads_2.begin();
×
260

261
    for (; it_1 != chunk->reads_1.end(); ++it_1, ++it_2) {
×
262
      it_1->add_prefix_to_name(m_config.prefix_read_1);
×
263
      stats_1->process(*it_1);
×
264
      chunks.add(*it_1, read_type::pe_1);
×
265

266
      it_2->add_prefix_to_name(m_config.prefix_read_2);
×
267
      stats_2->process(*it_2);
×
268
      chunks.add(*it_2, read_type::pe_2);
×
269
    }
270
  } else {
271
    for (auto& read : chunk->reads_1) {
×
272
      read.add_prefix_to_name(m_config.prefix_read_1);
×
273

274
      stats_1->process(read);
×
275
      chunks.add(read, read_type::se);
×
276
    }
277
  }
278

279
  m_stats_1.release(stats_1);
×
280
  m_stats_2.release(stats_2);
×
281

282
  return chunks.finalize(chunk->eof);
×
283
}
284

285
void
286
processes_unidentified::finalize()
×
287
{
288
  m_stats_1.merge_into(*m_statistics->unidentified_stats_1);
×
289
  m_stats_2.merge_into(*m_statistics->unidentified_stats_2);
×
290
}
291

292
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc