• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #101

18 Apr 2025 12:19PM UTC coverage: 67.186% (+1.8%) from 65.404%
#101

push

travis-ci

web-flow
add meta-data class for read serialization (#127)

This simplifies passing of additional information (to be added) that
influences how reads are serialized.

The read_type enum is further more expanded, allowing the file_type
to be derived from the read_type enum by the meta data class.

Test cases were added for the serializers

472 of 531 new or added lines in 8 files covered. (88.89%)

2 existing lines in 1 file now uncovered.

9697 of 14433 relevant lines covered (67.19%)

3063.78 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/demultiplexing.cpp
1
// SPDX-License-Identifier: GPL-3.0-or-later
2
// SPDX-FileCopyrightText: 2021 Stinus Lindgreen <stinus@binf.ku.dk>
3
// SPDX-FileCopyrightText: 2014 Mikkel Schubert <mikkelsch@gmail.com>
4
#include "demultiplexing.hpp"
5
#include "barcode_table.hpp" // for barcode_table
6
#include "debug.hpp"         // for AR_REQUIRE, AR_REQUIRE_SINGLE_THREAD
7
#include "fastq_io.hpp"      // for chunk_ptr, fastq...
8
#include "output.hpp"        // for output_files
9
#include "sequence_sets.hpp" // for adapter_set
10
#include "userconfig.hpp"    // for userconfig, ar_command, ar_command::demul...
11
#include <cstddef>           // for size_t
12
#include <memory>            // for make_unique, unique_ptr
13
#include <utility>           // for move
14

15
namespace adapterremoval {
16

17
///////////////////////////////////////////////////////////////////////////////
18
// Implementations for `demultiplex_reads`
19

20
demultiplex_reads::demultiplex_reads(const userconfig& config,
×
21
                                     const post_demux_steps& steps,
22
                                     demux_stats_ptr stats)
×
23
  : analytical_step(processing_order::ordered, "demultiplex_reads")
24
  , m_samples(config.samples)
×
25
  , m_barcode_table(m_samples,
×
26
                    config.barcode_mm,
×
27
                    config.barcode_mm_r1,
×
28
                    config.barcode_mm_r2)
×
29
  , m_config(config)
×
30
  , m_steps(steps)
×
31
  , m_cache(steps)
×
32
  , m_statistics(std::move(stats))
×
33
{
34
  AR_REQUIRE(m_samples.size());
×
35
  AR_REQUIRE(m_samples.size() == m_steps.samples.size());
×
36
  AR_REQUIRE(m_statistics);
×
37

38
  AR_REQUIRE(m_statistics->samples.empty());
×
39
  m_statistics->samples.resize(m_samples.size());
×
40

41
  // Map global barcode offsets to sample and relative barcode offsets
42
  for (size_t i = 0; i < m_samples.size(); ++i) {
×
43
    const size_t barcodes = m_samples.at(i).size();
×
44

45
    m_statistics->samples.at(i).resize_up_to(barcodes);
×
46
    for (size_t j = 0; j < barcodes; ++j) {
×
47
      m_barcodes.emplace_back(i, j);
×
48
    }
49
  }
50
}
51

52
///////////////////////////////////////////////////////////////////////////////
53

54
demultiplex_se_reads::demultiplex_se_reads(const userconfig& config,
×
55
                                           const post_demux_steps& steps,
56
                                           demux_stats_ptr stats)
×
57
  : demultiplex_reads(config, steps, std::move(stats))
×
58
{
59
}
60

61
chunk_vec
62
demultiplex_se_reads::process(chunk_ptr chunk)
×
63
{
64
  AR_REQUIRE(chunk);
×
65
  AR_REQUIRE_SINGLE_THREAD(m_lock);
×
66
  for (auto& read : chunk->reads_1) {
×
67
    const auto [sample, barcode] = m_barcode_table.identify(read);
×
68
    // TODO: We should keep reads even if we cannot identify the exact barcode
69
    if (sample < 0 || barcode < 0) {
×
70
      switch (sample) {
×
71
        case barcode_key::unidentified:
×
72
          m_statistics->unidentified += 1;
×
73
          break;
×
74
        case barcode_key::ambiguous:
×
75
          m_statistics->ambiguous += 1;
×
76
          break;
×
77
        default:
×
78
          AR_FAIL("invalid barcode match sample");
×
79
      }
80

81
      m_cache.add_unidentified_1(std::move(read));
×
82
    } else {
83
      read.truncate(m_barcode_table.length_1());
×
84

85
      m_statistics->samples.at(sample).inc(barcode);
×
86
      m_cache.add_read_1(std::move(read), sample, barcode);
×
87
    }
88
  }
89

90
  return m_cache.flush(chunk->eof, chunk->mate_separator);
×
91
}
92

93
///////////////////////////////////////////////////////////////////////////////
94

95
demultiplex_pe_reads::demultiplex_pe_reads(const userconfig& config,
×
96
                                           const post_demux_steps& steps,
97
                                           demux_stats_ptr stats)
×
98
  : demultiplex_reads(config, steps, std::move(stats))
×
99
{
100
}
101

102
chunk_vec
103
demultiplex_pe_reads::process(chunk_ptr chunk)
×
104
{
105
  AR_REQUIRE(chunk);
×
106
  AR_REQUIRE_SINGLE_THREAD(m_lock);
×
107
  AR_REQUIRE(chunk->reads_1.size() == chunk->reads_2.size());
×
108

109
  auto it_1 = chunk->reads_1.begin();
×
110
  auto it_2 = chunk->reads_2.begin();
×
111
  for (; it_1 != chunk->reads_1.end(); ++it_1, ++it_2) {
×
112
    const auto [sample, barcode] = m_barcode_table.identify(*it_1, *it_2);
×
113

114
    // TODO: We should keep reads even if we cannot identify the exact barcode
115
    if (sample < 0 || barcode < 0) {
×
116
      switch (sample) {
×
117
        case barcode_key::unidentified:
×
118
          m_statistics->unidentified += 2;
×
119
          break;
×
120
        case barcode_key::ambiguous:
×
121
          m_statistics->ambiguous += 2;
×
122
          break;
×
123
        default:
×
124
          AR_FAIL("invalid barcode match sample");
×
125
      }
126

127
      m_cache.add_unidentified_1(std::move(*it_1));
×
128
      m_cache.add_unidentified_2(std::move(*it_2));
×
129
    } else {
130
      it_1->truncate(m_barcode_table.length_1());
×
131
      m_cache.add_read_1(std::move(*it_1), sample, barcode);
×
132

133
      it_2->truncate(m_barcode_table.length_2());
×
134
      m_cache.add_read_2(std::move(*it_2), sample);
×
135

136
      m_statistics->samples.at(sample).inc(barcode, 2);
×
137
    }
138
  }
139

140
  return m_cache.flush(chunk->eof, chunk->mate_separator);
×
141
}
142

143
///////////////////////////////////////////////////////////////////////////////
144

145
process_demultiplexed::process_demultiplexed(const userconfig& config,
×
146
                                             const sample_output_files& output,
147
                                             const size_t sample,
148
                                             trim_stats_ptr sink)
×
149
  : analytical_step(processing_order::unordered, "process_demultiplexed")
150
  , m_config(config)
×
151
  , m_output(output)
×
152
  , m_samples(config.samples)
×
153
  , m_sample(sample)
×
154
  , m_stats_sink(std::move(sink))
×
155
{
156
  m_stats.emplace_back_n(m_config.max_threads, m_config.report_sample_rate);
×
157
}
158

159
chunk_vec
160
process_demultiplexed::process(chunk_ptr chunk)
×
161
{
162
  AR_REQUIRE(chunk);
×
163
  processed_reads chunks{ m_output };
×
164
  chunks.set_sample(m_samples.at(m_sample));
×
165
  chunks.set_mate_separator(chunk->mate_separator);
×
166
  chunks.set_demultiplexing_only(true);
×
167

168
  if (chunk->first) {
×
169
    chunks.write_headers(m_config.args);
×
170
  }
171

172
  auto stats = m_stats.acquire();
×
173

174
  AR_REQUIRE(chunk->reads_1.size() == chunk->barcodes.size());
×
175
  auto barcode = chunk->barcodes.begin();
×
176

177
  if (m_config.paired_ended_mode) {
×
178
    AR_REQUIRE(chunk->reads_1.size() == chunk->reads_2.size());
×
179

180
    auto it_1 = chunk->reads_1.begin();
×
181
    auto it_2 = chunk->reads_2.begin();
×
182
    for (; it_1 != chunk->reads_1.end(); ++it_1, ++it_2, ++barcode) {
×
183
      it_1->add_prefix_to_name(m_config.prefix_read_1);
×
184
      stats->read_1->process(*it_1);
×
NEW
185
      chunks.add(*it_1, read_type::pe_1, *barcode);
×
186

187
      it_2->add_prefix_to_name(m_config.prefix_read_2);
×
188
      stats->read_2->process(*it_2);
×
NEW
189
      chunks.add(*it_2, read_type::pe_2, *barcode);
×
190
    }
191
  } else {
192
    for (auto& read : chunk->reads_1) {
×
193
      read.add_prefix_to_name(m_config.prefix_read_1);
×
194

195
      stats->read_1->process(read);
×
NEW
196
      chunks.add(read, read_type::se, *barcode++);
×
197
    }
198
  }
199

200
  m_stats.release(stats);
×
201

202
  return chunks.finalize(chunk->eof);
×
203
}
204

205
void
206
process_demultiplexed::finalize()
×
207
{
208
  m_stats.merge_into(*m_stats_sink);
×
209
}
210

211
///////////////////////////////////////////////////////////////////////////////
212

213
processes_unidentified::processes_unidentified(const userconfig& config,
×
214
                                               const output_files& output,
215
                                               demux_stats_ptr stats)
×
216
  : analytical_step(processing_order::unordered, "processes_unidentified")
217
  , m_config(config)
×
218
  , m_statistics(std::move(stats))
×
219
{
220
  m_output.set_file(read_file::mate_1, output.unidentified_1);
×
221
  m_output.set_file(read_file::mate_2, output.unidentified_2);
×
222

223
  if (output.unidentified_1_step != output_files::disabled) {
×
224
    m_output.set_step(read_file::mate_1, output.unidentified_1_step);
×
225
  }
226

227
  if (output.unidentified_1_step != output.unidentified_2_step &&
×
228
      output.unidentified_2_step != output_files::disabled) {
×
229
    m_output.set_step(read_file::mate_2, output.unidentified_2_step);
×
230
  }
231

232
  m_stats_1.emplace_back_n(m_config.max_threads, config.report_sample_rate);
×
233
  m_stats_2.emplace_back_n(m_config.max_threads, config.report_sample_rate);
×
234
}
235

236
chunk_vec
237
processes_unidentified::process(chunk_ptr chunk)
×
238
{
239
  AR_REQUIRE(chunk);
×
240
  processed_reads chunks{ m_output };
×
241
  chunks.set_sample(m_config.samples.unidentified());
×
242
  chunks.set_mate_separator(chunk->mate_separator);
×
243

244
  if (chunk->first) {
×
245
    chunks.write_headers(m_config.args);
×
246
  }
247

248
  auto stats_1 = m_stats_1.acquire();
×
249
  auto stats_2 = m_stats_2.acquire();
×
250

251
  if (m_config.paired_ended_mode) {
×
252
    AR_REQUIRE(chunk->reads_1.size() == chunk->reads_2.size());
×
253

254
    auto it_1 = chunk->reads_1.begin();
×
255
    auto it_2 = chunk->reads_2.begin();
×
256

257
    for (; it_1 != chunk->reads_1.end(); ++it_1, ++it_2) {
×
258
      it_1->add_prefix_to_name(m_config.prefix_read_1);
×
259
      stats_1->process(*it_1);
×
NEW
260
      chunks.add(*it_1, read_type::pe_1);
×
261

262
      it_2->add_prefix_to_name(m_config.prefix_read_2);
×
263
      stats_2->process(*it_2);
×
NEW
264
      chunks.add(*it_2, read_type::pe_2);
×
265
    }
266
  } else {
267
    for (auto& read : chunk->reads_1) {
×
268
      read.add_prefix_to_name(m_config.prefix_read_1);
×
269

270
      stats_1->process(read);
×
NEW
271
      chunks.add(read, read_type::se);
×
272
    }
273
  }
274

275
  m_stats_1.release(stats_1);
×
276
  m_stats_2.release(stats_2);
×
277

278
  return chunks.finalize(chunk->eof);
×
279
}
280

281
void
282
processes_unidentified::finalize()
×
283
{
284
  m_stats_1.merge_into(*m_statistics->unidentified_stats_1);
×
285
  m_stats_2.merge_into(*m_statistics->unidentified_stats_2);
×
286
}
287

288
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc