• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #46

27 Nov 2024 03:10PM UTC coverage: 27.245% (+1.0%) from 26.244%
#46

push

travis-ci

MikkelSchubert
fix convenience executable make target

2609 of 9576 relevant lines covered (27.25%)

4268.73 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/demultiplexing.cpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2021 by Stinus Lindgreen - stinus@binf.ku.dk            *
5
 * Copyright (C) 2014 by Mikkel Schubert - mikkelsch@gmail.com           *
6
 *                                                                       *
7
 * This program is free software: you can redistribute it and/or modify  *
8
 * it under the terms of the GNU General Public License as published by  *
9
 * the Free Software Foundation, either version 3 of the License, or     *
10
 * (at your option) any later version.                                   *
11
 *                                                                       *
12
 * This program is distributed in the hope that it will be useful,       *
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
15
 * GNU General Public License for more details.                          *
16
 *                                                                       *
17
 * You should have received a copy of the GNU General Public License     *
18
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
19
\*************************************************************************/
20
#include "demultiplexing.hpp"
21
#include "barcode_table.hpp" // for barcode_table
22
#include "debug.hpp"         // for AR_REQUIRE, AR_REQUIRE_SINGLE_THREAD
23
#include "fastq_io.hpp"      // for chunk_ptr, fastq...
24
#include "output.hpp"        // for output_files
25
#include "sequence_sets.hpp" // for adapter_set
26
#include "serializer.hpp"    // for fastq_flags
27
#include "userconfig.hpp"    // for userconfig, ar_command, ar_command::demul...
28
#include <cstddef>           // for size_t
29
#include <memory>            // for make_unique, unique_ptr
30
#include <utility>           // for move
31

32
namespace adapterremoval {
33

34
///////////////////////////////////////////////////////////////////////////////
35
// Implementations for `demultiplex_reads`
36

37
demultiplex_reads::demultiplex_reads(const userconfig& config,
×
38
                                     const post_demux_steps& steps,
39
                                     demux_stats_ptr stats)
×
40
  : analytical_step(processing_order::ordered, "demultiplex_reads")
41
  , m_samples(config.samples)
×
42
  , m_barcode_table(m_samples,
×
43
                    config.barcode_mm,
×
44
                    config.barcode_mm_r1,
×
45
                    config.barcode_mm_r2)
×
46
  , m_config(config)
×
47
  , m_steps(steps)
×
48
  , m_cache(steps)
×
49
  , m_statistics(std::move(stats))
×
50
{
51
  AR_REQUIRE(m_samples.size());
×
52
  AR_REQUIRE(m_samples.size() == m_steps.samples.size());
×
53
  AR_REQUIRE(m_statistics);
×
54
  AR_REQUIRE(m_statistics->samples.size() == m_samples.size());
×
55

56
  // Map global barcode offsets to sample and relative barcode offsets
57
  for (size_t i = 0; i < m_samples.size(); ++i) {
×
58
    const size_t barcodes = m_samples.at(i).size();
×
59
    for (size_t j = 0; j < barcodes; ++j) {
×
60
      m_barcodes.emplace_back(i, j);
×
61
    }
62
  }
63
}
64

65
///////////////////////////////////////////////////////////////////////////////
66

67
demultiplex_se_reads::demultiplex_se_reads(const userconfig& config,
×
68
                                           const post_demux_steps& steps,
69
                                           demux_stats_ptr stats)
×
70
  : demultiplex_reads(config, steps, std::move(stats))
×
71
{
72
}
73

74
chunk_vec
75
demultiplex_se_reads::process(chunk_ptr chunk)
×
76
{
77
  AR_REQUIRE(chunk);
×
78
  AR_REQUIRE_SINGLE_THREAD(m_lock);
×
79
  for (auto& read : chunk->reads_1) {
×
80
    const auto [sample, barcode] = m_barcode_table.identify(read);
×
81
    // TODO: We should keep reads even if we cannot identify the exact barcode
82
    if (sample < 0 || barcode < 0) {
×
83
      switch (sample) {
×
84
        case barcode_key::unidentified:
×
85
          m_statistics->unidentified += 1;
×
86
          break;
×
87
        case barcode_key::ambiguous:
×
88
          m_statistics->ambiguous += 1;
×
89
          break;
×
90
        default:
×
91
          AR_FAIL("invalid barcode match sample");
×
92
      }
93

94
      m_cache.add_unidentified_1(std::move(read));
×
95
    } else {
96
      read.truncate(m_barcode_table.length_1());
×
97

98
      m_statistics->samples.at(sample) += 1;
×
99
      m_cache.add_read_1(std::move(read), sample, barcode);
×
100
    }
101
  }
102

103
  return m_cache.flush(chunk->eof, chunk->mate_separator);
×
104
}
105

106
///////////////////////////////////////////////////////////////////////////////
107

108
demultiplex_pe_reads::demultiplex_pe_reads(const userconfig& config,
×
109
                                           const post_demux_steps& steps,
110
                                           demux_stats_ptr stats)
×
111
  : demultiplex_reads(config, steps, std::move(stats))
×
112
{
113
}
114

115
chunk_vec
116
demultiplex_pe_reads::process(chunk_ptr chunk)
×
117
{
118
  AR_REQUIRE(chunk);
×
119
  AR_REQUIRE_SINGLE_THREAD(m_lock);
×
120
  AR_REQUIRE(chunk->reads_1.size() == chunk->reads_2.size());
×
121

122
  auto it_1 = chunk->reads_1.begin();
×
123
  auto it_2 = chunk->reads_2.begin();
×
124
  for (; it_1 != chunk->reads_1.end(); ++it_1, ++it_2) {
×
125
    const auto [sample, barcode] = m_barcode_table.identify(*it_1, *it_2);
×
126

127
    // TODO: We should keep reads even if we cannot identify the exact barcode
128
    if (sample < 0 || barcode < 0) {
×
129
      switch (sample) {
×
130
        case barcode_key::unidentified:
×
131
          m_statistics->unidentified += 2;
×
132
          break;
×
133
        case barcode_key::ambiguous:
×
134
          m_statistics->ambiguous += 2;
×
135
          break;
×
136
        default:
×
137
          AR_FAIL("invalid barcode match sample");
×
138
      }
139

140
      m_cache.add_unidentified_1(std::move(*it_1));
×
141
      m_cache.add_unidentified_2(std::move(*it_2));
×
142
    } else {
143
      it_1->truncate(m_barcode_table.length_1());
×
144
      m_cache.add_read_1(std::move(*it_1), sample, barcode);
×
145

146
      it_2->truncate(m_barcode_table.length_2());
×
147
      m_cache.add_read_2(std::move(*it_2), sample);
×
148

149
      m_statistics->samples.at(sample) += 2;
×
150
    }
151
  }
152

153
  return m_cache.flush(chunk->eof, chunk->mate_separator);
×
154
}
155

156
///////////////////////////////////////////////////////////////////////////////
157

158
process_demultiplexed::process_demultiplexed(const userconfig& config,
×
159
                                             const sample_output_files& output,
160
                                             const size_t sample,
161
                                             trim_stats_ptr sink)
×
162
  : analytical_step(processing_order::unordered, "process_demultiplexed")
163
  , m_config(config)
×
164
  , m_output(output)
×
165
  , m_samples(config.samples)
×
166
  , m_sample(sample)
×
167
  , m_stats_sink(std::move(sink))
×
168
{
169
  m_stats.emplace_back_n(m_config.max_threads, m_config.report_sample_rate);
×
170
}
171

172
chunk_vec
173
process_demultiplexed::process(chunk_ptr chunk)
×
174
{
175
  AR_REQUIRE(chunk);
×
176
  processed_reads chunks{ m_output };
×
177
  chunks.set_sample(m_samples.at(m_sample));
×
178
  chunks.set_mate_separator(chunk->mate_separator);
×
179
  chunks.set_demultiplexing_only(true);
×
180

181
  if (chunk->first) {
×
182
    chunks.write_headers(m_config.args);
×
183
  }
184

185
  auto stats = m_stats.acquire();
×
186

187
  AR_REQUIRE(chunk->reads_1.size() == chunk->barcodes.size());
×
188
  auto barcode = chunk->barcodes.begin();
×
189

190
  if (m_config.paired_ended_mode) {
×
191
    AR_REQUIRE(chunk->reads_1.size() == chunk->reads_2.size());
×
192

193
    auto it_1 = chunk->reads_1.begin();
×
194
    auto it_2 = chunk->reads_2.begin();
×
195
    for (; it_1 != chunk->reads_1.end(); ++it_1, ++it_2, ++barcode) {
×
196
      it_1->add_prefix_to_name(m_config.prefix_read_1);
×
197
      stats->read_1->process(*it_1);
×
198
      chunks.add(*it_1, read_type::mate_1, fastq_flags::pe_1, *barcode);
×
199

200
      it_2->add_prefix_to_name(m_config.prefix_read_2);
×
201
      stats->read_2->process(*it_2);
×
202
      chunks.add(*it_2, read_type::mate_2, fastq_flags::pe_2, *barcode);
×
203
    }
204
  } else {
205
    for (auto& read : chunk->reads_1) {
×
206
      read.add_prefix_to_name(m_config.prefix_read_1);
×
207

208
      stats->read_1->process(read);
×
209
      chunks.add(read, read_type::mate_1, fastq_flags::se, *barcode++);
×
210
    }
211
  }
212

213
  m_stats.release(stats);
×
214

215
  return chunks.finalize(chunk->eof);
×
216
}
217

218
void
219
process_demultiplexed::finalize()
×
220
{
221
  m_stats.merge_into(*m_stats_sink);
×
222
}
223

224
///////////////////////////////////////////////////////////////////////////////
225

226
processes_unidentified::processes_unidentified(const userconfig& config,
×
227
                                               const output_files& output,
228
                                               demux_stats_ptr stats)
×
229
  : analytical_step(processing_order::unordered, "processes_unidentified")
230
  , m_config(config)
×
231
  , m_statistics(std::move(stats))
×
232
{
233
  m_output.set_file(read_type::mate_1, output.unidentified_1);
×
234
  m_output.set_file(read_type::mate_2, output.unidentified_2);
×
235

236
  m_output.set_step(read_type::mate_1, output.unidentified_1_step);
×
237
  if (output.unidentified_1_step != output.unidentified_2_step &&
×
238
      output.unidentified_2_step != output_files::disabled) {
×
239
    m_output.set_step(read_type::mate_2, output.unidentified_2_step);
×
240
  }
241

242
  m_stats_1.emplace_back_n(m_config.max_threads, config.report_sample_rate);
×
243
  m_stats_2.emplace_back_n(m_config.max_threads, config.report_sample_rate);
×
244
}
245

246
chunk_vec
247
processes_unidentified::process(chunk_ptr chunk)
×
248
{
249
  AR_REQUIRE(chunk);
×
250
  processed_reads chunks{ m_output };
×
251
  chunks.set_sample(m_config.samples.unidentified());
×
252
  chunks.set_mate_separator(chunk->mate_separator);
×
253

254
  if (chunk->first) {
×
255
    chunks.write_headers(m_config.args);
×
256
  }
257

258
  auto stats_1 = m_stats_1.acquire();
×
259
  auto stats_2 = m_stats_2.acquire();
×
260

261
  if (m_config.paired_ended_mode) {
×
262
    AR_REQUIRE(chunk->reads_1.size() == chunk->reads_2.size());
×
263

264
    auto it_1 = chunk->reads_1.begin();
×
265
    auto it_2 = chunk->reads_2.begin();
×
266

267
    for (; it_1 != chunk->reads_1.end(); ++it_1, ++it_2) {
×
268
      it_1->add_prefix_to_name(m_config.prefix_read_1);
×
269
      stats_1->process(*it_1);
×
270
      chunks.add(*it_1, read_type::mate_1, fastq_flags::pe_1, 0);
×
271

272
      it_2->add_prefix_to_name(m_config.prefix_read_2);
×
273
      stats_2->process(*it_2);
×
274
      chunks.add(*it_2, read_type::mate_2, fastq_flags::pe_2, 0);
×
275
    }
276
  } else {
277
    for (auto& read : chunk->reads_1) {
×
278
      read.add_prefix_to_name(m_config.prefix_read_1);
×
279

280
      stats_1->process(read);
×
281
      chunks.add(read, read_type::mate_1, fastq_flags::se, 0);
×
282
    }
283
  }
284

285
  m_stats_1.release(stats_1);
×
286
  m_stats_2.release(stats_2);
×
287

288
  return chunks.finalize(chunk->eof);
×
289
}
290

291
void
292
processes_unidentified::finalize()
×
293
{
294
  m_stats_1.merge_into(*m_statistics->unidentified_stats_1);
×
295
  m_stats_2.merge_into(*m_statistics->unidentified_stats_2);
×
296
}
297

298
} // namespace adapterremoval
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc