• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #117

25 May 2025 03:01PM UTC coverage: 66.932% (-0.07%) from 67.006%
#117

push

travis-ci

web-flow
iwyu and reduce build-time inter-dependencies (#144)

26 of 145 new or added lines in 20 files covered. (17.93%)

89 existing lines in 5 files now uncovered.

9738 of 14549 relevant lines covered (66.93%)

3041.19 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/output.cpp
1
// SPDX-License-Identifier: GPL-3.0-or-later
2
// SPDX-FileCopyrightText: 2024 Mikkel Schubert <mikkelsch@gmail.com>
3
#include "output.hpp"     // declarations
4
#include "buffer.hpp"     // for buffer
5
#include "debug.hpp"      // for AR_REQUIRE, AR_FAIL
6
#include "fastq.hpp"      // for fastq
7
#include "fastq_io.hpp"   // for write_fastq, split_fastq, gzip_split_fastq
8
#include "scheduler.hpp"  // for analytical_chunk
9
#include "serializer.hpp" // for read_meta
10
#include "strutils.hpp"   // for ends_with
11
#include <limits>         // for numeric_limits
12

13
namespace adapterremoval {
14

15
class userconfig;
16

17
namespace {
18

19
buffer&
20
get_buffer(chunk_ptr& chunk)
×
21
{
22
  AR_REQUIRE(chunk);
×
23
  if (chunk->buffers.empty()) {
×
24
    chunk->buffers.emplace_back();
×
25
  }
26

27
  return chunk->buffers.back();
×
28
}
29

30
} // namespace
31

32
////////////////////////////////////////////////////////////////////////////////
33
// Implementations for `sample_output_files`
34

35
const size_t sample_output_files::disabled = std::numeric_limits<size_t>::max();
36

37
size_t
38
add_write_step(scheduler& sch,
×
39
               const userconfig& config,
40
               const output_file& file)
41
{
42
  AR_REQUIRE(file.name != DEV_NULL);
×
43
  size_t step_id = sch.add<write_fastq>(config, file);
×
44

45
  switch (file.format) {
×
46
    case output_format::fastq:
47
    case output_format::sam:
48
      break;
49
    case output_format::fastq_gzip:
×
50
    case output_format::sam_gzip:
×
51
    case output_format::bam:
×
52
    case output_format::ubam: {
×
53
      step_id = sch.add<gzip_split_fastq>(config, file, step_id);
×
54
      step_id = sch.add<split_fastq>(config, file, step_id);
×
55
      break;
×
56
    }
57
    default:
×
58
      AR_FAIL("invalid output format");
×
59
  }
60

61
  return step_id;
×
62
}
63

64
sample_output_files::sample_output_files()
×
65
{
66
  m_offsets.fill(sample_output_files::disabled);
×
67
}
68

69
void
70
sample_output_files::set_file(const read_file rtype, output_file file)
×
71
{
72
  const auto index = static_cast<size_t>(rtype);
×
73
  AR_REQUIRE(m_offsets.at(index) == sample_output_files::disabled);
×
74

75
  // If the file type isn't being saved, then there is no need to process the
76
  // reads. This saves time especially when output compression is enabled.
77
  if (file.name != DEV_NULL) {
×
78
    // FIXME: This assumes that filesystem is case sensitive
79
    auto it = m_output.begin();
×
80
    for (; it != m_output.end(); ++it) {
×
81
      if (file.name == it->file.name) {
×
82
        break;
83
      }
84
    }
85

86
    if (it == m_output.end()) {
×
87
      m_output.push_back({ std::move(file), disabled });
×
88
      m_offsets.at(index) = m_output.size() - 1;
×
89
    } else {
90
      AR_REQUIRE(file == it->file);
×
91
      const auto existing_index = it - m_output.begin();
×
92
      m_offsets.at(index) = existing_index;
×
93
    }
94
  }
95
}
96

97
void
98
sample_output_files::set_step(read_file rtype, size_t step)
×
99
{
100
  const auto index = static_cast<size_t>(rtype);
×
101
  const auto offset = m_offsets.at(index);
×
102
  AR_REQUIRE(offset != disabled);
×
103

104
  AR_REQUIRE(m_output.at(offset).step == disabled);
×
105
  m_output.at(offset).step = step;
×
106
}
107

108
size_t
109
sample_output_files::offset(read_file value) const
×
110
{
111
  return m_offsets.at(static_cast<size_t>(value));
×
112
}
113

114
////////////////////////////////////////////////////////////////////////////////
115
// Implementations for `output_files`
116

117
const size_t output_files::disabled = static_cast<size_t>(-1);
118

119
bool
120
output_files::parse_format(std::string_view filename, output_format& sink)
×
121
{
122
  const std::string value = to_lower(std::string(filename));
×
123
  // ubam is tested separately, to avoid supporting non-standard file extensions
124
  if (value == "ubam") {
×
125
    sink = output_format::ubam;
×
126
    return true;
×
127
  }
128

129
  return parse_extension("." + value, sink);
×
130
}
131

132
bool
133
output_files::parse_extension(std::string_view filename, output_format& sink)
×
134
{
135
  const std::string value = to_lower(std::string(filename));
×
136
  if (ends_with(value, ".fq.gz") || ends_with(value, ".fastq.gz")) {
×
137
    sink = output_format::fastq_gzip;
×
138
  } else if (ends_with(value, ".fq") || ends_with(value, ".fastq")) {
×
139
    sink = output_format::fastq;
×
140
  } else if (ends_with(value, ".sam.gz")) {
×
141
    sink = output_format::sam_gzip;
×
142
  } else if (ends_with(value, ".sam")) {
×
143
    sink = output_format::sam;
×
144
  } else if (ends_with(value, ".bam")) {
×
145
    sink = output_format::bam;
×
146
  } else {
147
    return false;
148
  }
149

150
  return true;
151
}
152

153
std::string_view
154
output_files::file_extension(const output_format format)
×
155
{
156
  switch (format) {
×
157
    case output_format::fastq:
×
158
      return ".fastq";
×
159
    case output_format::fastq_gzip:
×
160
      return ".fastq.gz";
×
161
    case output_format::sam:
×
162
      return ".sam";
×
163
    case output_format::sam_gzip:
×
164
      return ".sam.gz";
×
165
    case output_format::bam:
×
166
    case output_format::ubam:
×
167
      return ".bam";
×
168
    default:
×
169
      AR_FAIL("invalid output format");
×
170
  }
171
}
172

173
void
174
output_files::add_write_steps(scheduler& sch, const userconfig& config)
×
175
{
176
  AR_REQUIRE(unidentified_1_step == disabled &&
×
177
             unidentified_2_step == disabled);
178

179
  for (auto& sample : m_samples) {
×
180
    for (auto& it : sample.m_output) {
×
181
      AR_REQUIRE(it.step == disabled);
×
182
      it.step = add_write_step(sch, config, it.file);
×
183
    }
184
  }
185

186
  if (unidentified_1.name != DEV_NULL) {
×
187
    unidentified_1_step = add_write_step(sch, config, unidentified_1);
×
188
  }
189

190
  if (unidentified_1.name == unidentified_2.name) {
×
191
    unidentified_2_step = unidentified_1_step;
×
192
  } else if (unidentified_2.name != DEV_NULL) {
×
193
    unidentified_2_step = add_write_step(sch, config, unidentified_2);
×
194
  }
195
}
196

197
////////////////////////////////////////////////////////////////////////////////
198
// Implementations for `processed_reads`
199

200
processed_reads::processed_reads(const sample_output_files& map)
×
201
  : m_map(map)
×
202
{
203
  for (size_t i = 0; i < map.size(); ++i) {
×
204
    m_chunks.emplace_back(std::make_unique<analytical_chunk>());
×
205
    m_serializers.emplace_back(map.format(i));
×
206
  }
207
}
208

NEW
209
processed_reads::~processed_reads() = default;
×
210

211
void
212
processed_reads::set_sample(const sample& value)
×
213
{
214
  for (auto& it : m_serializers) {
×
215
    it.set_sample(value);
×
216
  }
217
}
218

219
void
220
processed_reads::set_mate_separator(char value)
×
221
{
222
  m_mate_separator = value;
×
223
  for (auto& it : m_serializers) {
×
224
    it.set_mate_separator(value);
×
225
  }
226
}
227

228
void
229
processed_reads::set_demultiplexing_only(bool value)
×
230
{
231
  for (auto& it : m_serializers) {
×
232
    it.set_demultiplexing_only(value);
×
233
  }
234
}
235

236
void
237
processed_reads::write_headers(const string_vec& args)
×
238
{
239
  for (size_t i = 0; i < m_chunks.size(); ++i) {
×
240
    m_serializers.at(i).header(get_buffer(m_chunks.at(i)), args);
×
241
  }
242
}
243

244
void
NEW
245
processed_reads::add(const fastq& read, read_type flags, size_t barcode)
×
246
{
NEW
247
  add(read, read_meta(flags).barcode(barcode));
×
248
}
249

250
void
251
processed_reads::add(const fastq& read, const read_meta& meta)
×
252
{
253
  const size_t offset = m_map.offset(meta.get_file());
×
254
  if (offset != sample_output_files::disabled) {
×
255
    auto& buffer = get_buffer(m_chunks.at(offset));
×
256
    m_serializers.at(offset).record(buffer, read, meta);
×
257
    m_chunks.at(offset)->reads++;
×
258
  }
259
}
260

261
chunk_vec
262
processed_reads::finalize(bool eof)
×
263
{
264
  chunk_vec chunks;
×
265

266
  for (size_t i = 0; i < m_chunks.size(); ++i) {
×
267
    auto& chunk = m_chunks.at(i);
×
268
    chunk->mate_separator = m_mate_separator;
×
269
    chunk->eof = eof;
×
270

271
    chunks.emplace_back(m_map.step(i), std::move(chunk));
×
272
  }
273

274
  return chunks;
×
275
}
×
276

277
///////////////////////////////////////////////////////////////////////////////
278
// Implementations for `post_demux_steps`
279

280
const size_t post_demux_steps::disabled = output_files::disabled;
281

282
///////////////////////////////////////////////////////////////////////////////
283
// Implementations for `demultiplexed_reads`
284

285
namespace {
286

287
void
288
flush_chunk(chunk_vec& output,
×
289
            chunk_ptr& ptr,
290
            size_t step,
291
            const bool eof,
292
            const char mate_separator)
293
{
294
  if (eof || ptr->reads >= INPUT_READS) {
×
295
    ptr->eof = eof;
×
296
    ptr->mate_separator = mate_separator;
×
297
    output.emplace_back(step, std::move(ptr));
×
298
    ptr = std::make_unique<analytical_chunk>();
×
299
  }
300
}
301

302
} // namespace
303

304
demultiplexed_reads::demultiplexed_reads(const post_demux_steps& steps)
×
305
  : m_steps(steps)
×
306
{
307
  // Position 0 is used for unidentified reads
308
  m_cache.push_back(std::make_unique<analytical_chunk>());
×
309

310
  for (const auto next_step : m_steps.samples) {
×
311
    AR_REQUIRE(next_step != post_demux_steps::disabled);
×
312

313
    m_cache.push_back(std::make_unique<analytical_chunk>());
×
314
  }
315

316
  // The first chunks should have headers written depending on format
317
  for (auto& it : m_cache) {
×
318
    it->first = true;
×
319
  }
320
}
321

322
void
323
demultiplexed_reads::add_unidentified_1(fastq&& read)
×
324
{
325
  m_cache.at(0)->reads_1.push_back(std::move(read));
×
326
}
327

328
void
329
demultiplexed_reads::add_unidentified_2(fastq&& read)
×
330
{
331
  m_cache.at(0)->reads_2.push_back(std::move(read));
×
332
}
333

334
void
335
demultiplexed_reads::add_read_1(fastq&& read, size_t sample, size_t barcode)
×
336
{
337
  auto& chunk = *m_cache.at(sample + 1);
×
338
  chunk.reads_1.push_back(std::move(read));
×
339
  chunk.barcodes.push_back(barcode);
×
340
}
341

342
void
343
demultiplexed_reads::add_read_2(fastq&& read, size_t sample)
×
344
{
345
  m_cache.at(sample + 1)->reads_2.push_back(std::move(read));
×
346
}
347

348
chunk_vec
349
demultiplexed_reads::flush(bool eof, char mate_separator)
×
350
{
351
  chunk_vec output;
×
352

353
  // Unidentified reads; these are treated as a pseudo-sample for simplicity
354
  flush_chunk(output, m_cache.at(0), m_steps.unidentified, eof, mate_separator);
×
355

356
  for (size_t i = 1; i < m_cache.size(); ++i) {
×
357
    flush_chunk(output,
×
358
                m_cache.at(i),
×
359
                m_steps.samples.at(i - 1),
×
360
                eof,
361
                mate_separator);
362
  }
363

364
  return output;
×
365
}
×
366

367
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc