• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #95

13 Apr 2025 06:28PM UTC coverage: 30.749%. Remained the same
#95

push

travis-ci

web-flow
rename enums read_type and fastq_flags (#121)

To clarifies usage of two commonly used enums, `read_type` was renamed
to `read_file`, since it is used to specify input and output file
types, while `fastq_flags` was renamed to `read_type` since it is used
to differentiate between different pre/post-processed read types

0 of 66 new or added lines in 6 files covered. (0.0%)

4 existing lines in 1 file now uncovered.

3157 of 10267 relevant lines covered (30.75%)

3943.28 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/demultiplexing.cpp
1
// SPDX-License-Identifier: GPL-3.0-or-later
2
// SPDX-FileCopyrightText: 2021 Stinus Lindgreen <stinus@binf.ku.dk>
3
// SPDX-FileCopyrightText: 2014 Mikkel Schubert <mikkelsch@gmail.com>
4
#include "demultiplexing.hpp"
5
#include "barcode_table.hpp" // for barcode_table
6
#include "debug.hpp"         // for AR_REQUIRE, AR_REQUIRE_SINGLE_THREAD
7
#include "fastq_io.hpp"      // for chunk_ptr, fastq...
8
#include "output.hpp"        // for output_files
9
#include "sequence_sets.hpp" // for adapter_set
10
#include "serializer.hpp"    // for read_type
11
#include "userconfig.hpp"    // for userconfig, ar_command, ar_command::demul...
12
#include <cstddef>           // for size_t
13
#include <memory>            // for make_unique, unique_ptr
14
#include <utility>           // for move
15

16
namespace adapterremoval {
17

18
///////////////////////////////////////////////////////////////////////////////
19
// Implementations for `demultiplex_reads`
20

21
demultiplex_reads::demultiplex_reads(const userconfig& config,
×
22
                                     const post_demux_steps& steps,
23
                                     demux_stats_ptr stats)
×
24
  : analytical_step(processing_order::ordered, "demultiplex_reads")
25
  , m_samples(config.samples)
×
26
  , m_barcode_table(m_samples,
×
27
                    config.barcode_mm,
×
28
                    config.barcode_mm_r1,
×
29
                    config.barcode_mm_r2)
×
30
  , m_config(config)
×
31
  , m_steps(steps)
×
32
  , m_cache(steps)
×
33
  , m_statistics(std::move(stats))
×
34
{
35
  AR_REQUIRE(m_samples.size());
×
36
  AR_REQUIRE(m_samples.size() == m_steps.samples.size());
×
37
  AR_REQUIRE(m_statistics);
×
38

39
  AR_REQUIRE(m_statistics->samples.empty());
×
40
  m_statistics->samples.resize(m_samples.size());
×
41

42
  // Map global barcode offsets to sample and relative barcode offsets
43
  for (size_t i = 0; i < m_samples.size(); ++i) {
×
44
    const size_t barcodes = m_samples.at(i).size();
×
45

46
    m_statistics->samples.at(i).resize_up_to(barcodes);
×
47
    for (size_t j = 0; j < barcodes; ++j) {
×
48
      m_barcodes.emplace_back(i, j);
×
49
    }
50
  }
51
}
52

53
///////////////////////////////////////////////////////////////////////////////
54

55
demultiplex_se_reads::demultiplex_se_reads(const userconfig& config,
×
56
                                           const post_demux_steps& steps,
57
                                           demux_stats_ptr stats)
×
58
  : demultiplex_reads(config, steps, std::move(stats))
×
59
{
60
}
61

62
chunk_vec
63
demultiplex_se_reads::process(chunk_ptr chunk)
×
64
{
65
  AR_REQUIRE(chunk);
×
66
  AR_REQUIRE_SINGLE_THREAD(m_lock);
×
67
  for (auto& read : chunk->reads_1) {
×
68
    const auto [sample, barcode] = m_barcode_table.identify(read);
×
69
    // TODO: We should keep reads even if we cannot identify the exact barcode
70
    if (sample < 0 || barcode < 0) {
×
71
      switch (sample) {
×
72
        case barcode_key::unidentified:
×
73
          m_statistics->unidentified += 1;
×
74
          break;
×
75
        case barcode_key::ambiguous:
×
76
          m_statistics->ambiguous += 1;
×
77
          break;
×
78
        default:
×
79
          AR_FAIL("invalid barcode match sample");
×
80
      }
81

82
      m_cache.add_unidentified_1(std::move(read));
×
83
    } else {
84
      read.truncate(m_barcode_table.length_1());
×
85

86
      m_statistics->samples.at(sample).inc(barcode);
×
87
      m_cache.add_read_1(std::move(read), sample, barcode);
×
88
    }
89
  }
90

91
  return m_cache.flush(chunk->eof, chunk->mate_separator);
×
92
}
93

94
///////////////////////////////////////////////////////////////////////////////
95

96
demultiplex_pe_reads::demultiplex_pe_reads(const userconfig& config,
×
97
                                           const post_demux_steps& steps,
98
                                           demux_stats_ptr stats)
×
99
  : demultiplex_reads(config, steps, std::move(stats))
×
100
{
101
}
102

103
chunk_vec
104
demultiplex_pe_reads::process(chunk_ptr chunk)
×
105
{
106
  AR_REQUIRE(chunk);
×
107
  AR_REQUIRE_SINGLE_THREAD(m_lock);
×
108
  AR_REQUIRE(chunk->reads_1.size() == chunk->reads_2.size());
×
109

110
  auto it_1 = chunk->reads_1.begin();
×
111
  auto it_2 = chunk->reads_2.begin();
×
112
  for (; it_1 != chunk->reads_1.end(); ++it_1, ++it_2) {
×
113
    const auto [sample, barcode] = m_barcode_table.identify(*it_1, *it_2);
×
114

115
    // TODO: We should keep reads even if we cannot identify the exact barcode
116
    if (sample < 0 || barcode < 0) {
×
117
      switch (sample) {
×
118
        case barcode_key::unidentified:
×
119
          m_statistics->unidentified += 2;
×
120
          break;
×
121
        case barcode_key::ambiguous:
×
122
          m_statistics->ambiguous += 2;
×
123
          break;
×
124
        default:
×
125
          AR_FAIL("invalid barcode match sample");
×
126
      }
127

128
      m_cache.add_unidentified_1(std::move(*it_1));
×
129
      m_cache.add_unidentified_2(std::move(*it_2));
×
130
    } else {
131
      it_1->truncate(m_barcode_table.length_1());
×
132
      m_cache.add_read_1(std::move(*it_1), sample, barcode);
×
133

134
      it_2->truncate(m_barcode_table.length_2());
×
135
      m_cache.add_read_2(std::move(*it_2), sample);
×
136

137
      m_statistics->samples.at(sample).inc(barcode, 2);
×
138
    }
139
  }
140

141
  return m_cache.flush(chunk->eof, chunk->mate_separator);
×
142
}
143

144
///////////////////////////////////////////////////////////////////////////////
145

146
process_demultiplexed::process_demultiplexed(const userconfig& config,
×
147
                                             const sample_output_files& output,
148
                                             const size_t sample,
149
                                             trim_stats_ptr sink)
×
150
  : analytical_step(processing_order::unordered, "process_demultiplexed")
151
  , m_config(config)
×
152
  , m_output(output)
×
153
  , m_samples(config.samples)
×
154
  , m_sample(sample)
×
155
  , m_stats_sink(std::move(sink))
×
156
{
157
  m_stats.emplace_back_n(m_config.max_threads, m_config.report_sample_rate);
×
158
}
159

160
chunk_vec
161
process_demultiplexed::process(chunk_ptr chunk)
×
162
{
163
  AR_REQUIRE(chunk);
×
164
  processed_reads chunks{ m_output };
×
165
  chunks.set_sample(m_samples.at(m_sample));
×
166
  chunks.set_mate_separator(chunk->mate_separator);
×
167
  chunks.set_demultiplexing_only(true);
×
168

169
  if (chunk->first) {
×
170
    chunks.write_headers(m_config.args);
×
171
  }
172

173
  auto stats = m_stats.acquire();
×
174

175
  AR_REQUIRE(chunk->reads_1.size() == chunk->barcodes.size());
×
176
  auto barcode = chunk->barcodes.begin();
×
177

178
  if (m_config.paired_ended_mode) {
×
179
    AR_REQUIRE(chunk->reads_1.size() == chunk->reads_2.size());
×
180

181
    auto it_1 = chunk->reads_1.begin();
×
182
    auto it_2 = chunk->reads_2.begin();
×
183
    for (; it_1 != chunk->reads_1.end(); ++it_1, ++it_2, ++barcode) {
×
184
      it_1->add_prefix_to_name(m_config.prefix_read_1);
×
185
      stats->read_1->process(*it_1);
×
NEW
186
      chunks.add(*it_1, read_file::mate_1, read_type::pe_1, *barcode);
×
187

188
      it_2->add_prefix_to_name(m_config.prefix_read_2);
×
189
      stats->read_2->process(*it_2);
×
NEW
190
      chunks.add(*it_2, read_file::mate_2, read_type::pe_2, *barcode);
×
191
    }
192
  } else {
193
    for (auto& read : chunk->reads_1) {
×
194
      read.add_prefix_to_name(m_config.prefix_read_1);
×
195

196
      stats->read_1->process(read);
×
NEW
197
      chunks.add(read, read_file::mate_1, read_type::se, *barcode++);
×
198
    }
199
  }
200

201
  m_stats.release(stats);
×
202

203
  return chunks.finalize(chunk->eof);
×
204
}
205

206
void
207
process_demultiplexed::finalize()
×
208
{
209
  m_stats.merge_into(*m_stats_sink);
×
210
}
211

212
///////////////////////////////////////////////////////////////////////////////
213

214
processes_unidentified::processes_unidentified(const userconfig& config,
×
215
                                               const output_files& output,
216
                                               demux_stats_ptr stats)
×
217
  : analytical_step(processing_order::unordered, "processes_unidentified")
218
  , m_config(config)
×
219
  , m_statistics(std::move(stats))
×
220
{
NEW
221
  m_output.set_file(read_file::mate_1, output.unidentified_1);
×
NEW
222
  m_output.set_file(read_file::mate_2, output.unidentified_2);
×
223

224
  if (output.unidentified_1_step != output_files::disabled) {
×
NEW
225
    m_output.set_step(read_file::mate_1, output.unidentified_1_step);
×
226
  }
227

228
  if (output.unidentified_1_step != output.unidentified_2_step &&
×
229
      output.unidentified_2_step != output_files::disabled) {
×
NEW
230
    m_output.set_step(read_file::mate_2, output.unidentified_2_step);
×
231
  }
232

233
  m_stats_1.emplace_back_n(m_config.max_threads, config.report_sample_rate);
×
234
  m_stats_2.emplace_back_n(m_config.max_threads, config.report_sample_rate);
×
235
}
236

237
chunk_vec
238
processes_unidentified::process(chunk_ptr chunk)
×
239
{
240
  AR_REQUIRE(chunk);
×
241
  processed_reads chunks{ m_output };
×
242
  chunks.set_sample(m_config.samples.unidentified());
×
243
  chunks.set_mate_separator(chunk->mate_separator);
×
244

245
  if (chunk->first) {
×
246
    chunks.write_headers(m_config.args);
×
247
  }
248

249
  auto stats_1 = m_stats_1.acquire();
×
250
  auto stats_2 = m_stats_2.acquire();
×
251

252
  if (m_config.paired_ended_mode) {
×
253
    AR_REQUIRE(chunk->reads_1.size() == chunk->reads_2.size());
×
254

255
    auto it_1 = chunk->reads_1.begin();
×
256
    auto it_2 = chunk->reads_2.begin();
×
257

258
    for (; it_1 != chunk->reads_1.end(); ++it_1, ++it_2) {
×
259
      it_1->add_prefix_to_name(m_config.prefix_read_1);
×
260
      stats_1->process(*it_1);
×
NEW
261
      chunks.add(*it_1, read_file::mate_1, read_type::pe_1, 0);
×
262

263
      it_2->add_prefix_to_name(m_config.prefix_read_2);
×
264
      stats_2->process(*it_2);
×
NEW
265
      chunks.add(*it_2, read_file::mate_2, read_type::pe_2, 0);
×
266
    }
267
  } else {
268
    for (auto& read : chunk->reads_1) {
×
269
      read.add_prefix_to_name(m_config.prefix_read_1);
×
270

271
      stats_1->process(read);
×
NEW
272
      chunks.add(read, read_file::mate_1, read_type::se, 0);
×
273
    }
274
  }
275

276
  m_stats_1.release(stats_1);
×
277
  m_stats_2.release(stats_2);
×
278

279
  return chunks.finalize(chunk->eof);
×
280
}
281

282
void
283
processes_unidentified::finalize()
×
284
{
285
  m_stats_1.merge_into(*m_statistics->unidentified_stats_1);
×
286
  m_stats_2.merge_into(*m_statistics->unidentified_stats_2);
×
287
}
288

289
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc