• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #95

13 Apr 2025 06:28PM UTC coverage: 30.749%. Remained the same
#95

push

travis-ci

web-flow
rename enums read_type and fastq_flags (#121)

To clarifies usage of two commonly used enums, `read_type` was renamed
to `read_file`, since it is used to specify input and output file
types, while `fastq_flags` was renamed to `read_type` since it is used
to differentiate between different pre/post-processed read types

0 of 66 new or added lines in 6 files covered. (0.0%)

4 existing lines in 1 file now uncovered.

3157 of 10267 relevant lines covered (30.75%)

3943.28 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/output.cpp
1
// SPDX-License-Identifier: GPL-3.0-or-later
2
// SPDX-FileCopyrightText: 2024 Mikkel Schubert <mikkelsch@gmail.com>
3
#include "output.hpp"     // declarations
4
#include "buffer.hpp"     // for buffer
5
#include "debug.hpp"      // for AR_REQUIRE, AR_FAIL
6
#include "fastq.hpp"      // for fastq
7
#include "fastq_io.hpp"   // for write_fastq, split_fastq, gzip_split_fastq
8
#include "scheduler.hpp"  // for analytical_chunk
9
#include "serializer.hpp" // for fastq_serializer
10
#include "strutils.hpp"   // for ends_with
11
#include <limits>         // for numeric_limits
12

13
namespace adapterremoval {
14

15
class userconfig;
16

17
namespace {
18

19
buffer&
20
get_buffer(chunk_ptr& chunk)
×
21
{
22
  AR_REQUIRE(chunk);
×
23
  if (chunk->buffers.empty()) {
×
24
    chunk->buffers.emplace_back();
×
25
  }
26

27
  return chunk->buffers.back();
×
28
}
29

30
} // namespace
31

32
////////////////////////////////////////////////////////////////////////////////
33
// Implementations for `sample_output_files`
34

35
const size_t sample_output_files::disabled = std::numeric_limits<size_t>::max();
36

37
size_t
38
add_write_step(scheduler& sch,
×
39
               const userconfig& config,
40
               const output_file& file)
41
{
42
  AR_REQUIRE(file.name != DEV_NULL);
×
43
  size_t step_id = sch.add<write_fastq>(config, file);
×
44

45
  switch (file.format) {
×
46
    case output_format::fastq:
47
    case output_format::sam:
48
      break;
49
    case output_format::fastq_gzip:
×
50
    case output_format::sam_gzip:
×
51
    case output_format::bam:
×
52
    case output_format::ubam: {
×
53
      step_id = sch.add<gzip_split_fastq>(config, file, step_id);
×
54
      step_id = sch.add<split_fastq>(config, file, step_id);
×
55
      break;
×
56
    }
57
    default:
×
58
      AR_FAIL("invalid output format");
×
59
  }
60

61
  return step_id;
×
62
}
63

64
sample_output_files::sample_output_files()
×
65
{
66
  m_offsets.fill(sample_output_files::disabled);
×
67
}
68

69
void
NEW
70
sample_output_files::set_file(const read_file rtype, output_file file)
×
71
{
72
  const auto index = static_cast<size_t>(rtype);
×
73
  AR_REQUIRE(m_offsets.at(index) == sample_output_files::disabled);
×
74

75
  // If the file type isn't being saved, then there is no need to process the
76
  // reads. This saves time especially when output compression is enabled.
77
  if (file.name != DEV_NULL) {
×
78
    // FIXME: This assumes that filesystem is case sensitive
79
    auto it = m_output.begin();
×
80
    for (; it != m_output.end(); ++it) {
×
81
      if (file.name == it->file.name) {
×
82
        break;
83
      }
84
    }
85

86
    if (it == m_output.end()) {
×
87
      m_output.push_back({ std::move(file), disabled });
×
88
      m_offsets.at(index) = m_output.size() - 1;
×
89
    } else {
90
      AR_REQUIRE(file == it->file);
×
91
      const auto existing_index = it - m_output.begin();
×
92
      m_offsets.at(index) = existing_index;
×
93
    }
94
  }
95
}
96

97
void
NEW
98
sample_output_files::set_step(read_file rtype, size_t step)
×
99
{
100
  const auto index = static_cast<size_t>(rtype);
×
101
  const auto offset = m_offsets.at(index);
×
102
  AR_REQUIRE(offset != disabled);
×
103

104
  AR_REQUIRE(m_output.at(offset).step == disabled);
×
105
  m_output.at(offset).step = step;
×
106
}
107

108
size_t
NEW
109
sample_output_files::offset(read_file value) const
×
110
{
111
  return m_offsets.at(static_cast<size_t>(value));
×
112
}
113

114
////////////////////////////////////////////////////////////////////////////////
115
// Implementations for `output_files`
116

117
const size_t output_files::disabled = static_cast<size_t>(-1);
118

119
bool
120
output_files::parse_format(std::string_view filename, output_format& sink)
×
121
{
122
  const std::string value = to_lower(std::string(filename));
×
123
  // ubam is tested separately, to avoid supporting non-standard file extensions
124
  if (value == "ubam") {
×
125
    sink = output_format::ubam;
×
126
    return true;
×
127
  }
128

129
  return parse_extension("." + value, sink);
×
130
}
131

132
bool
133
output_files::parse_extension(std::string_view filename, output_format& sink)
×
134
{
135
  const std::string value = to_lower(std::string(filename));
×
136
  if (ends_with(value, ".fq.gz") || ends_with(value, ".fastq.gz")) {
×
137
    sink = output_format::fastq_gzip;
×
138
  } else if (ends_with(value, ".fq") || ends_with(value, ".fastq")) {
×
139
    sink = output_format::fastq;
×
140
  } else if (ends_with(value, ".sam.gz")) {
×
141
    sink = output_format::sam_gzip;
×
142
  } else if (ends_with(value, ".sam")) {
×
143
    sink = output_format::sam;
×
144
  } else if (ends_with(value, ".bam")) {
×
145
    sink = output_format::bam;
×
146
  } else {
147
    return false;
148
  }
149

150
  return true;
151
}
152

153
std::string_view
154
output_files::file_extension(const output_format format)
×
155
{
156
  switch (format) {
×
157
    case output_format::fastq:
×
158
      return ".fastq";
×
159
    case output_format::fastq_gzip:
×
160
      return ".fastq.gz";
×
161
    case output_format::sam:
×
162
      return ".sam";
×
163
    case output_format::sam_gzip:
×
164
      return ".sam.gz";
×
165
    case output_format::bam:
×
166
    case output_format::ubam:
×
167
      return ".bam";
×
168
    default:
×
169
      AR_FAIL("invalid output format");
×
170
  }
171
}
172

173
void
174
output_files::add_write_steps(scheduler& sch, const userconfig& config)
×
175
{
176
  AR_REQUIRE(unidentified_1_step == disabled &&
×
177
             unidentified_2_step == disabled);
178

179
  for (auto& sample : m_samples) {
×
180
    for (auto& it : sample.m_output) {
×
181
      AR_REQUIRE(it.step == disabled);
×
182
      it.step = add_write_step(sch, config, it.file);
×
183
    }
184
  }
185

186
  if (unidentified_1.name != DEV_NULL) {
×
187
    unidentified_1_step = add_write_step(sch, config, unidentified_1);
×
188
  }
189

190
  if (unidentified_1.name == unidentified_2.name) {
×
191
    unidentified_2_step = unidentified_1_step;
×
192
  } else if (unidentified_2.name != DEV_NULL) {
×
193
    unidentified_2_step = add_write_step(sch, config, unidentified_2);
×
194
  }
195
}
196

197
////////////////////////////////////////////////////////////////////////////////
198
// Implementations for `processed_reads`
199

200
processed_reads::processed_reads(const sample_output_files& map)
×
201
  : m_map(map)
×
202
{
203
  for (size_t i = 0; i < map.size(); ++i) {
×
204
    m_chunks.emplace_back(std::make_unique<analytical_chunk>());
×
205
    m_serializers.emplace_back(map.format(i));
×
206
  }
207
}
208

209
void
210
processed_reads::set_sample(const sample& value)
×
211
{
212
  for (auto& it : m_serializers) {
×
213
    it.set_sample(value);
×
214
  }
215
}
216

217
void
218
processed_reads::set_mate_separator(char value)
×
219
{
220
  m_mate_separator = value;
×
221
  for (auto& it : m_serializers) {
×
222
    it.set_mate_separator(value);
×
223
  }
224
}
225

226
void
227
processed_reads::set_demultiplexing_only(bool value)
×
228
{
229
  for (auto& it : m_serializers) {
×
230
    it.set_demultiplexing_only(value);
×
231
  }
232
}
233

234
void
235
processed_reads::write_headers(const string_vec& args)
×
236
{
237
  for (size_t i = 0; i < m_chunks.size(); ++i) {
×
238
    m_serializers.at(i).header(get_buffer(m_chunks.at(i)), args);
×
239
  }
240
}
241

242
void
243
processed_reads::add(const fastq& read,
×
244
                     const read_file type,
245
                     const read_type flags,
246
                     const size_t barcode)
247
{
248
  const size_t offset = m_map.offset(type);
×
249
  if (offset != sample_output_files::disabled) {
×
250
    auto& buffer = get_buffer(m_chunks.at(offset));
×
251
    m_serializers.at(offset).record(buffer, read, flags, barcode);
×
252
    m_chunks.at(offset)->reads++;
×
253
  }
254
}
255

256
chunk_vec
257
processed_reads::finalize(bool eof)
×
258
{
259
  chunk_vec chunks;
×
260

261
  for (size_t i = 0; i < m_chunks.size(); ++i) {
×
262
    auto& chunk = m_chunks.at(i);
×
263
    chunk->mate_separator = m_mate_separator;
×
264
    chunk->eof = eof;
×
265

266
    chunks.emplace_back(m_map.step(i), std::move(chunk));
×
267
  }
268

269
  return chunks;
×
270
}
×
271

272
///////////////////////////////////////////////////////////////////////////////
273
// Implementations for `post_demux_steps`
274

275
const size_t post_demux_steps::disabled = output_files::disabled;
276

277
///////////////////////////////////////////////////////////////////////////////
278
// Implementations for `demultiplexed_reads`
279

280
namespace {
281

282
void
283
flush_chunk(chunk_vec& output,
×
284
            chunk_ptr& ptr,
285
            size_t step,
286
            const bool eof,
287
            const char mate_separator)
288
{
289
  if (eof || ptr->reads >= INPUT_READS) {
×
290
    ptr->eof = eof;
×
291
    ptr->mate_separator = mate_separator;
×
292
    output.emplace_back(step, std::move(ptr));
×
293
    ptr = std::make_unique<analytical_chunk>();
×
294
  }
295
}
296

297
} // namespace
298

299
demultiplexed_reads::demultiplexed_reads(const post_demux_steps& steps)
×
300
  : m_steps(steps)
×
301
{
302
  // Position 0 is used for unidentified reads
303
  m_cache.push_back(std::make_unique<analytical_chunk>());
×
304

305
  for (const auto next_step : m_steps.samples) {
×
306
    AR_REQUIRE(next_step != post_demux_steps::disabled);
×
307

308
    m_cache.push_back(std::make_unique<analytical_chunk>());
×
309
  }
310

311
  // The first chunks should have headers written depending on format
312
  for (auto& it : m_cache) {
×
313
    it->first = true;
×
314
  }
315
}
316

317
void
318
demultiplexed_reads::add_unidentified_1(fastq&& read)
×
319
{
320
  m_cache.at(0)->reads_1.push_back(std::move(read));
×
321
}
322

323
void
324
demultiplexed_reads::add_unidentified_2(fastq&& read)
×
325
{
326
  m_cache.at(0)->reads_2.push_back(std::move(read));
×
327
}
328

329
void
330
demultiplexed_reads::add_read_1(fastq&& read, size_t sample, size_t barcode)
×
331
{
332
  auto& chunk = *m_cache.at(sample + 1);
×
333
  chunk.reads_1.push_back(std::move(read));
×
334
  chunk.barcodes.push_back(barcode);
×
335
}
336

337
void
338
demultiplexed_reads::add_read_2(fastq&& read, size_t sample)
×
339
{
340
  m_cache.at(sample + 1)->reads_2.push_back(std::move(read));
×
341
}
342

343
chunk_vec
344
demultiplexed_reads::flush(bool eof, char mate_separator)
×
345
{
346
  chunk_vec output;
×
347

348
  // Unidentified reads; these are treated as a pseudo-sample for simplicity
349
  flush_chunk(output, m_cache.at(0), m_steps.unidentified, eof, mate_separator);
×
350

351
  for (size_t i = 1; i < m_cache.size(); ++i) {
×
352
    flush_chunk(output,
×
353
                m_cache.at(i),
×
354
                m_steps.samples.at(i - 1),
×
355
                eof,
356
                mate_separator);
357
  }
358

359
  return output;
×
360
}
×
361

362
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc