• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #105

25 Apr 2025 08:50AM UTC coverage: 66.927% (-0.03%) from 66.961%
#105

push

travis-ci

web-flow
avoid undefined behavior when stringifying enums (#131)

It is not completely clear to me what is allowed, based on the spec,
so playing it safe

0 of 3 new or added lines in 2 files covered. (0.0%)

2 existing lines in 1 file now uncovered.

9691 of 14480 relevant lines covered (66.93%)

3053.83 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.5
/tests/unit/serializer_test.cpp
1
// SPDX-License-Identifier: GPL-3.0-or-later
2
// SPDX-FileCopyrightText: 2011 Stinus Lindgreen <stinus@binf.ku.dk>
3
// SPDX-FileCopyrightText: 2014 Mikkel Schubert <mikkelsch@gmail.com>
4
#include "buffer.hpp"      // for buffer
5
#include "commontypes.hpp" // for fastq_vec
6
#include "fastq.hpp"       // for fastq, fastq::ntrimmed, ACGTN, ACGT
7
#include "main.hpp"        // for VERSION
8
#include "read_group.hpp"  // for read_group
9
#include "sequence.hpp"    // for dna_sequence
10
#include "serializer.hpp"  // for serializer
11
#include "testing.hpp"     // for TEST_CASE, REQUIRE, ...
12
#include "utilities.hpp"   // for underlying_type
13
#include <string>          // for string
14
#include <string_view>     // for string_view
15

16
// Ignore nucleotide and quality strings
17
// spell-checker:ignoreRegExp /"[!-~]+"/g
18
// Ignore nucleotide comments
19
// spell-checker:ignoreRegExp /\W[acgtnACGTN]+\W/g
20

21
namespace adapterremoval {
22

23
namespace {
24

25
//! The program version with the leading 'v' removed; e.g. "3.0.1"
26
const std::string VERSION_NO_V{ VERSION.substr(1) };
27

28
constexpr std::string_view EXTREMELY_LONG_NAME =
29
  "123456789-123456789-123456789-123456789-123456789-123456789-123456789-"
30
  "123456789-123456789-123456789-123456789-123456789-123456789-123456789-"
31
  "123456789-123456789-123456789-123456789-123456789-123456789-123456789-"
32
  "123456789-123456789-123456789-123456789-1234";
33

34
// Basic named sample with barcodes, but otherwise no special properties
35
const sample BASIC_SAMPLE_WITH_BARCODES{ "foo",
36
                                         dna_sequence{ "ACGT" },
37
                                         dna_sequence{ "TGCA" },
38
                                         barcode_orientation::unspecified };
39

40
} // namespace
41

42
////////////////////////////////////////////////////////////////////////////////
43
// Implementation of enum debug serialization
44

45
std::ostream&
46
operator<<(std::ostream& os, const read_file& value)
×
47
{
48
  switch (value) {
×
49
    case read_file::mate_1:
×
50
      return os << "read_file::mate_1";
×
51
    case read_file::mate_2:
×
52
      return os << "read_file::mate_2";
×
53
    case read_file::merged:
×
54
      return os << "read_file::merged";
×
55
    case read_file::singleton:
×
56
      return os << "read_file::singleton";
×
57
    case read_file::discarded:
×
58
      return os << "read_file::discarded";
×
59
    case read_file::max:
×
60
    default:
×
NEW
61
      return os << "read_file{?}";
×
62
  }
63
}
64

65
///////////////////////////////////////////////////////////////////////////////
66
// FASTQ header serialization
67

68
TEST_CASE("Writing FASTQ header to buffer", "[serializer::fastq]")
12✔
69
{
70
  serializer s{ GENERATE(output_format::fastq, output_format::fastq_gzip) };
13✔
71

72
  // Sample information is only used in the case of --demultiplex-only
73
  if (GENERATE(true, false)) {
14✔
74
    auto sample{ BASIC_SAMPLE_WITH_BARCODES };
8✔
75

76
    // Read-group information is not used for FASTQ records
77
    if (GENERATE(true, false)) {
10✔
78
      sample.set_read_group(read_group{ "SM:my-sample" });
8✔
79
    }
80

81
    s.set_sample(sample);
16✔
82
  }
8✔
83

84
  buffer buf;
12✔
85

86
  SECTION("header")
36✔
87
  {
6✔
88
    // since FASTQ has no header, arguments should not matter
89
    s.header(buf, { "adapterremoval3", "--blah" });
6✔
90
    REQUIRE(buf == buffer{});
18✔
91
  }
12✔
92

93
  fastq record{ "record_1", "ACGTACGATA", "!$#$*68CGJ" };
36✔
94

95
  SECTION("basic record")
36✔
96
  {
6✔
97
    s.record(buf, record, read_meta{ read_type::se });
6✔
98
    REQUIRE(buf == "@record_1\nACGTACGATA\n+\n!$#$*68CGJ\n"_buffer);
18✔
99
  }
12✔
100
}
48✔
101

102
///////////////////////////////////////////////////////////////////////////////
103
// FASTQ record serialization
104

105
TEST_CASE("Writing FASTQ records when only demultiplexing")
2✔
106
{
107
  sample sample{ BASIC_SAMPLE_WITH_BARCODES };
2✔
108
  // Read-group information is not used for FASTQ records
109
  if (GENERATE(true, false)) {
3✔
110
    sample.set_read_group(read_group{ "SM:my-sample" });
2✔
111
  }
112

113
  serializer s{ output_format::fastq };
2✔
114
  s.set_sample(sample);
2✔
115
  s.set_demultiplexing_only(true);
2✔
116

117
  buffer buf;
2✔
118
  fastq record{ "record_1", "ACGTACGATA", "!$#$*68CGJ" };
6✔
119
  s.record(buf, record, read_meta{ read_type::se });
2✔
120

121
  // The read header should include the barcodes when only demultiplexing
122
  REQUIRE(buf == "@record_1 BC:ACGT-TGCA\nACGTACGATA\n+\n!$#$*68CGJ\n"_buffer);
6✔
123
}
10✔
124

125
TEST_CASE("Writing FASTQ with mate separator")
2✔
126
{
127
  fastq record{ "record_1/1", "ACGTACGATA", "!$#$*68CGJ" };
6✔
128

129
  serializer s{ output_format::fastq };
2✔
130
  // This shouldn't matter, as mate separators are not removed for FASTQ reads
131
  s.set_mate_separator(GENERATE('\0', '/'));
5✔
132

133
  buffer buf;
2✔
134
  s.record(buf, record, read_meta(read_type::pe_1));
2✔
135
  REQUIRE(buf == "@record_1/1\nACGTACGATA\n+\n!$#$*68CGJ\n"_buffer);
8✔
136
}
6✔
137

138
TEST_CASE("FASTQ is the same for all read and sub-formats")
40✔
139
{
140
  const auto type = GENERATE(read_type::se,
41✔
141
                             read_type::se_fail,
142
                             read_type::pe_1,
143
                             read_type::pe_1_fail,
144
                             read_type::pe_2,
145
                             read_type::pe_2_fail,
146
                             read_type::singleton_1,
147
                             read_type::singleton_2,
148
                             read_type::merged,
149
                             read_type::merged_fail,
150
                             read_type::se,
151
                             read_type::se_fail,
152
                             read_type::pe_1,
153
                             read_type::pe_1_fail,
154
                             read_type::pe_2,
155
                             read_type::pe_2_fail,
156
                             read_type::singleton_1,
157
                             read_type::singleton_2,
158
                             read_type::merged,
159
                             read_type::merged_fail);
160
  const auto format = GENERATE(output_format::fastq, output_format::fastq_gzip);
60✔
161

162
  buffer buf;
40✔
163
  fastq record{ "record_1", "ACGTACGATA", "!$#$*68CGJ" };
120✔
164

165
  const read_meta meta{ type };
40✔
166
  const serializer s{ format };
40✔
167
  s.record(buf, record, meta);
40✔
168

169
  REQUIRE(buf == "@record_1\nACGTACGATA\n+\n!$#$*68CGJ\n"_buffer);
160✔
170
}
120✔
171

172
TEST_CASE("Writing empty FASTQ to buffer", "[serializer::fastq]")
1✔
173
{
174
  buffer buf;
1✔
175
  fastq record{ "record_1", "", "" };
3✔
176

177
  serializer s{ output_format::fastq };
1✔
178
  s.record(buf, record, read_meta(read_type::se));
1✔
179

180
  REQUIRE(buf == "@record_1\n\n+\n\n"_buffer);
4✔
181
}
3✔
182

183
TEST_CASE("Writing FASTQ with meta-data to buffer", "[serializer::fastq]")
1✔
184
{
185
  buffer buf;
1✔
186
  fastq record{ "record_1 length=5", "ACGTA", "68CGJ" };
3✔
187

188
  serializer s{ output_format::fastq };
1✔
189
  s.record(buf, record, read_meta(read_type::se));
1✔
190

191
  REQUIRE(buf == "@record_1 length=5\nACGTA\n+\n68CGJ\n"_buffer);
4✔
192
}
3✔
193

194
///////////////////////////////////////////////////////////////////////////////
195
// SAM header serialization
196

197
TEST_CASE("Writing SAM header to buffer", "[serializer::fastq]")
4✔
198
{
199
  buffer buf;
4✔
200
  serializer s{ GENERATE(output_format::sam, output_format::sam_gzip) };
5✔
201

202
  if (GENERATE(true, false)) {
6✔
203
    s.set_sample(BASIC_SAMPLE_WITH_BARCODES);
2✔
204
  }
205

206
  s.header(buf, { "adapterremoval3", "--blah" });
4✔
207
  REQUIRE(buf == "@HD\tVN:1.6\tSO:unsorted\n@PG\tID:adapterremoval\t"
16✔
208
                 "PN:adapterremoval\tCL:adapterremoval3 --blah\t"
209
                 "VN:3.0.0-alpha3\n"_buffer);
210
}
12✔
211

212
TEST_CASE("Writing SAM read-group header to buffer", "[serializer::fastq]")
2✔
213
{
214
  sample sample{ BASIC_SAMPLE_WITH_BARCODES };
2✔
215
  sample.set_read_group({});
4✔
216

217
  buffer buf;
2✔
218
  serializer s{ GENERATE(output_format::sam, output_format::sam_gzip) };
3✔
219
  s.set_sample(sample);
2✔
220

221
  s.header(buf, { "adapterremoval3", "--blah" });
2✔
222
  REQUIRE(buf == "@HD\tVN:1.6\tSO:unsorted\n@RG\tID:foo\tSM:foo\t"
8✔
223
                 "BC:ACGT-TGCA\n@PG\tID:adapterremoval\t"
224
                 "PN:adapterremoval\tCL:adapterremoval3 --blah\t"
225
                 "VN:3.0.0-alpha3\n"_buffer);
226
}
8✔
227

228
TEST_CASE("Writing SAM read-group header to buffer with multiple barcodes",
2✔
229
          "[serializer::fastq]")
230
{
231
  sample sample{ BASIC_SAMPLE_WITH_BARCODES };
2✔
232
  sample.add_barcodes(dna_sequence{ "TTGG" },
6✔
233
                      dna_sequence{ "AGTT" },
4✔
234
                      barcode_orientation::unspecified);
235
  sample.set_read_group({});
4✔
236

237
  buffer buf;
2✔
238
  serializer s{ GENERATE(output_format::sam, output_format::sam_gzip) };
3✔
239
  s.set_sample(sample);
2✔
240

241
  s.header(buf, { "adapterremoval3", "--blah" });
2✔
242
  REQUIRE(buf == "@HD\tVN:1.6\tSO:unsorted\n@RG\tID:foo.1\tSM:foo\t"
8✔
243
                 "BC:ACGT-TGCA\n@RG\tID:foo.2\tSM:foo\tBC:TTGG-AGTT\n"
244
                 "@PG\tID:adapterremoval\tPN:adapterremoval\t"
245
                 "CL:adapterremoval3 --blah\tVN:3.0.0-alpha3\n"_buffer);
246
}
8✔
247

248
///////////////////////////////////////////////////////////////////////////////
249
// SAM record serialization
250

251
TEST_CASE("serialize SAM record without sample")
4✔
252
{
253
  buffer buf;
4✔
254
  serializer s{ GENERATE(output_format::sam, output_format::sam_gzip) };
5✔
255
  s.set_demultiplexing_only(GENERATE(true, false));
10✔
256

257
  fastq record{ "record", "ACGTACGATA", "!$#$*68CGJ" };
12✔
258
  s.record(buf, record, read_meta{ read_type::se });
4✔
259

260
  REQUIRE(buf == "record\t4\t*\t0\t0\t*\t*\t0\t0\tACGTACGATA\t"
12✔
261
                 "!$#$*68CGJ\n"_buffer);
262
}
16✔
263

264
TEST_CASE("serialize SAM record with sample")
4✔
265
{
266
  serializer s{ GENERATE(output_format::sam, output_format::sam_gzip) };
5✔
267
  s.set_demultiplexing_only(GENERATE(true, false));
10✔
268
  sample sample{ BASIC_SAMPLE_WITH_BARCODES };
4✔
269
  sample.set_read_group(read_group{});
8✔
270
  s.set_sample(sample);
4✔
271

272
  buffer buf;
4✔
273
  fastq record{ "record", "ACGTACGATA", "!$#$*68CGJ" };
12✔
274
  s.record(buf, record, read_meta{ read_type::se });
4✔
275

276
  REQUIRE(buf == "record\t4\t*\t0\t0\t*\t*\t0\t0\tACGTACGATA\t!$#$*68CGJ\t"
12✔
277
                 "RG:Z:foo\n"_buffer);
278
}
20✔
279

280
TEST_CASE("serialize SAM record with multiple barcodes")
4✔
281
{
282
  sample sample{ BASIC_SAMPLE_WITH_BARCODES };
4✔
283
  sample.add_barcodes(dna_sequence{ "TTGG" },
12✔
284
                      dna_sequence{ "AGTT" },
8✔
285
                      barcode_orientation::unspecified);
286
  sample.set_read_group({});
8✔
287

288
  buffer buf;
4✔
289
  serializer s{ GENERATE(output_format::sam, output_format::sam_gzip) };
5✔
290
  s.set_sample(sample);
4✔
291

292
  fastq record{ "record", "ACGTACGATA", "!$#$*68CGJ" };
12✔
293

294
  SECTION("first/default barcodes")
12✔
295
  {
2✔
296
    s.record(buf, record, read_meta{ read_type::se });
2✔
297
    REQUIRE(buf == "record\t4\t*\t0\t0\t*\t*\t0\t0\tACGTACGATA\t!$#$*68CGJ\t"
6✔
298
                   "RG:Z:foo.1\n"_buffer);
299
  }
4✔
300

301
  SECTION("second barcodes")
12✔
302
  {
2✔
303
    auto meta = read_meta{ read_type::se }.barcode(1);
2✔
304
    s.record(buf, record, meta);
2✔
305
    REQUIRE(buf == "record\t4\t*\t0\t0\t*\t*\t0\t0\tACGTACGATA\t!$#$*68CGJ\t"
8✔
306
                   "RG:Z:foo.2\n"_buffer);
307
  }
4✔
308
}
20✔
309

310
TEST_CASE("serialize SAM record with mate separator")
4✔
311
{
312
  buffer buf;
4✔
313
  serializer s{ GENERATE(output_format::sam, output_format::sam_gzip) };
5✔
314

315
  fastq record{ "record/1", "ACGTACGATA", "!$#$*68CGJ" };
12✔
316

317
  SECTION("without mate sep")
12✔
318
  {
2✔
319
    s.record(buf, record, read_meta{ read_type::se });
2✔
320
    REQUIRE(buf == "record/1\t4\t*\t0\t0\t*\t*\t0\t0\tACGTACGATA\t"
6✔
321
                   "!$#$*68CGJ\n"_buffer);
322
  }
4✔
323

324
  SECTION("with mate sep")
12✔
325
  {
2✔
326
    s.set_mate_separator('/');
2✔
327
    s.record(buf, record, read_meta{ read_type::se });
2✔
328
    REQUIRE(buf == "record\t4\t*\t0\t0\t*\t*\t0\t0\tACGTACGATA\t"
6✔
329
                   "!$#$*68CGJ\n"_buffer);
330
  }
4✔
331
}
16✔
332

333
TEST_CASE("serialize read types for SAM")
20✔
334
{
335
  buffer buf;
20✔
336
  fastq record{ "record", "ACGTACGATA", "!$#$*68CGJ" };
60✔
337
  serializer s{ GENERATE(output_format::sam, output_format::sam_gzip) };
21✔
338

339
  SECTION("single end")
60✔
340
  {
4✔
341
    s.record(buf,
16✔
342
             record,
343
             read_meta{ GENERATE(read_type::se, read_type::merged) });
14✔
344
    REQUIRE(buf == "record\t4\t*\t0\t0\t*\t*\t0\t0\tACGTACGATA\t"
12✔
345
                   "!$#$*68CGJ\n"_buffer);
346
  }
20✔
347

348
  SECTION("single end failed QC")
60✔
349
  {
4✔
350
    s.record(buf,
16✔
351
             record,
352
             read_meta{ GENERATE(read_type::se_fail, read_type::merged_fail) });
14✔
353
    REQUIRE(buf == "record\t516\t*\t0\t0\t*\t*\t0\t0\tACGTACGATA\t"
12✔
354
                   "!$#$*68CGJ\n"_buffer);
355
  }
20✔
356

357
  SECTION("paired end mate 1, mate 2 passed/failed")
60✔
358
  {
4✔
359
    s.record(buf,
16✔
360
             record,
361
             read_meta{ GENERATE(read_type::pe_1, read_type::singleton_1) });
14✔
362
    REQUIRE(buf == "record\t77\t*\t0\t0\t*\t*\t0\t0\tACGTACGATA\t"
12✔
363
                   "!$#$*68CGJ\n"_buffer);
364
  }
20✔
365

366
  SECTION("paired end mate 1 failed, mate 2 passed/failed")
60✔
367
  {
2✔
368
    s.record(buf, record, read_meta{ read_type::pe_1_fail });
2✔
369
    REQUIRE(buf == "record\t589\t*\t0\t0\t*\t*\t0\t0\tACGTACGATA\t"
6✔
370
                   "!$#$*68CGJ\n"_buffer);
371
  }
20✔
372

373
  SECTION("paired end mate 2, mate 1 passed/failed")
60✔
374
  {
4✔
375
    s.record(buf,
16✔
376
             record,
377
             read_meta{ GENERATE(read_type::pe_2, read_type::singleton_2) });
14✔
378
    REQUIRE(buf == "record\t141\t*\t0\t0\t*\t*\t0\t0\tACGTACGATA\t"
12✔
379
                   "!$#$*68CGJ\n"_buffer);
380
  }
20✔
381

382
  SECTION("paired end mate 2 failed, mate 1 passed/failed")
60✔
383
  {
2✔
384
    s.record(buf, record, read_meta{ read_type::pe_2_fail });
2✔
385
    REQUIRE(buf == "record\t653\t*\t0\t0\t*\t*\t0\t0\tACGTACGATA\t"
6✔
386
                   "!$#$*68CGJ\n"_buffer);
387
  }
20✔
388
}
60✔
389

390
TEST_CASE("serialize empty SAM record")
2✔
391
{
392
  fastq record{ "record", "", "" };
6✔
393

394
  buffer buf;
2✔
395
  serializer s{ GENERATE(output_format::sam, output_format::sam_gzip) };
3✔
396
  s.record(buf, record, read_meta{ read_type::se });
2✔
397

398
  REQUIRE(buf == "record\t4\t*\t0\t0\t*\t*\t0\t0\t*\t*\n"_buffer);
8✔
399
}
6✔
400

401
TEST_CASE("serialize SAM record from FASTQ with meta-data")
2✔
402
{
403
  buffer buf;
2✔
404
  serializer s{ GENERATE(output_format::sam, output_format::sam_gzip) };
3✔
405

406
  fastq record{ "record length=NA", "ACGTACGATA", "!$#$*68CGJ" };
6✔
407
  s.record(buf, record, read_meta{ read_type::se });
2✔
408

409
  // The meta-data is (currently) not serialized
410
  REQUIRE(buf == "record\t4\t*\t0\t0\t*\t*\t0\t0\tACGTACGATA\t"
6✔
411
                 "!$#$*68CGJ\n"_buffer);
412
}
8✔
413

414
///////////////////////////////////////////////////////////////////////////////
415
// BAM header serialization
416

417
TEST_CASE("Writing BAM header to buffer", "[serializer::fastq]")
4✔
418
{
419
  serializer s{ GENERATE(output_format::bam, output_format::ubam) };
5✔
420

421
  // Sample information is only written if there is a read group, either
422
  // directly from the user or automatically assigned when demultiplexing
423
  if (GENERATE(true, false)) {
6✔
424
    sample sample{ BASIC_SAMPLE_WITH_BARCODES };
2✔
425
    s.set_sample(sample);
4✔
426
  }
2✔
427

428
  buffer buf;
4✔
429
  s.header(buf, { "adapterremoval3", "--blah" });
4✔
430
  REQUIRE(buf == "BAM\x01i\x00\x00\x00@HD\tVN:1.6\tSO:unsorted\n"
16✔
431
                 "@PG\tID:adapterremoval\tPN:adapterremoval\t"
432
                 "CL:adapterremoval3 --blah\tVN:3.0.0-alpha3\n"
433
                 "\x00\x00\x00\x00"_buffer);
434
}
12✔
435

436
TEST_CASE("Writing BAM read-group header to buffer", "[serializer::fastq]")
2✔
437
{
438

439
  sample sample{ BASIC_SAMPLE_WITH_BARCODES };
2✔
440
  sample.set_read_group(read_group{ "LB:lib" });
4✔
441

442
  serializer s{ GENERATE(output_format::bam, output_format::ubam) };
3✔
443
  s.set_sample(sample);
2✔
444

445
  buffer buf;
2✔
446
  s.header(buf, { "adapterremoval3", "--blah" });
2✔
447
  REQUIRE(buf == "BAM\x01\x8f\x00\x00\x00@HD\tVN:1.6\tSO:unsorted\n@RG\t"
8✔
448
                 "ID:foo\tLB:lib\tSM:foo\tBC:ACGT-TGCA\n@PG\t"
449
                 "ID:adapterremoval\tPN:adapterremoval\tCL:"
450
                 "adapterremoval3 --blah\tVN:3.0.0-alpha3\n"
451
                 "\x00\x00\x00\x00"_buffer);
452
}
8✔
453

454
TEST_CASE("Writing BAM read-group header to buffer with multiple barcodes",
2✔
455
          "[serializer::fastq]")
456
{
457
  sample sample{ BASIC_SAMPLE_WITH_BARCODES };
2✔
458
  sample.add_barcodes(dna_sequence{ "TTGG" },
6✔
459
                      dna_sequence{ "AGTT" },
4✔
460
                      barcode_orientation::unspecified);
461
  sample.set_read_group({});
4✔
462

463
  serializer s{ GENERATE(output_format::bam, output_format::ubam) };
3✔
464
  s.set_sample(sample);
2✔
465

466
  buffer buf;
2✔
467
  s.header(buf, { "adapterremoval3", "--blah" });
2✔
468
  REQUIRE(buf == "BAM\x01\xab\x00\x00\x00@HD\tVN:1.6\tSO:unsorted\n@RG\t"
8✔
469
                 "ID:foo.1\tSM:foo\tBC:ACGT-TGCA\n@RG\tID:foo.2\t"
470
                 "SM:foo\tBC:TTGG-AGTT\n@PG\tID:adapterremoval\t"
471
                 "PN:adapterremoval\tCL:adapterremoval3 "
472
                 "--blah\tVN:3.0.0-alpha3\n\x00\x00\x00\x00"_buffer);
473
}
8✔
474

475
////////////////////////////////////////////////////////////////////////////////
476
// BAM record serialization
477

478
TEST_CASE("serialize BAM record without sample")
4✔
479
{
480
  buffer buf;
4✔
481
  serializer s{ GENERATE(output_format::bam, output_format::ubam) };
5✔
482
  s.set_demultiplexing_only(GENERATE(true, false));
10✔
483

484
  fastq record{ "record", "ACGTACGATA", "!$#$*68CGJ" };
12✔
485
  s.record(buf, record, read_meta{ read_type::se });
4✔
486

487
  REQUIRE(buf == "6\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x07\x00H\x12"
12✔
488
                 "\x00\x00\x04\x00\n\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff"
489
                 "\xff\x00\x00\x00\x00record\x00\x12H\x12\x41\x81\x00\x03\x02"
490
                 "\x03\t\x15\x17\"&)"_buffer);
491
}
16✔
492

493
TEST_CASE("serialize BAM record with uneven length sequence")
4✔
494
{
495
  buffer buf;
4✔
496
  serializer s{ GENERATE(output_format::bam, output_format::ubam) };
5✔
497
  s.set_demultiplexing_only(GENERATE(true, false));
10✔
498

499
  fastq record{ "record", "ACGTACGAT", "!$#$*68CG" };
12✔
500
  s.record(buf, record, read_meta{ read_type::se });
4✔
501

502
  REQUIRE(buf == "5\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x07\x00H\x12"
12✔
503
                 "\x00\x00\x04\x00\t\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff"
504
                 "\xff\x00\x00\x00\x00record\x00\x12H\x12\x41\x80\x00\x03\x02"
505
                 "\x03\t\x15\x17\"&"_buffer);
506
}
16✔
507

508
TEST_CASE("serialize BAM record with sample")
4✔
509
{
510
  serializer s{ GENERATE(output_format::bam, output_format::ubam) };
5✔
511
  s.set_demultiplexing_only(GENERATE(true, false));
10✔
512
  sample sample{ BASIC_SAMPLE_WITH_BARCODES };
4✔
513
  sample.set_read_group(read_group{});
8✔
514
  s.set_sample(sample);
4✔
515

516
  buffer buf;
4✔
517
  fastq record{ "record", "ACGTACGATA", "!$#$*68CGJ" };
12✔
518
  s.record(buf, record, read_meta{ read_type::se });
4✔
519

520
  REQUIRE(buf == "=\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x07\x00H\x12"
12✔
521
                 "\x00\x00\x04\x00\n\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff"
522
                 "\xff\x00\x00\x00\x00record\x00\x12H\x12\x41\x81\x00\x03\x02"
523
                 "\x03\t\x15\x17\"&)RGZfoo\x00"_buffer);
524
}
20✔
525

526
TEST_CASE("serialize BAM record with multiple barcodes")
4✔
527
{
528
  sample sample{ BASIC_SAMPLE_WITH_BARCODES };
4✔
529
  sample.add_barcodes(dna_sequence{ "TTGG" },
12✔
530
                      dna_sequence{ "AGTT" },
8✔
531
                      barcode_orientation::unspecified);
532
  sample.set_read_group({});
8✔
533

534
  buffer buf;
4✔
535
  serializer s{ GENERATE(output_format::bam, output_format::ubam) };
5✔
536
  s.set_sample(sample);
4✔
537

538
  fastq record{ "record", "ACGTACGATA", "!$#$*68CGJ" };
12✔
539

540
  SECTION("first/default barcodes")
12✔
541
  {
2✔
542
    s.record(buf, record, read_meta{ read_type::se });
2✔
543
    REQUIRE(buf == "?\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x07\x00H\x12"
6✔
544
                   "\x00\x00\x04\x00\n\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff"
545
                   "\xff\x00\x00\x00\x00record\x00\x12H\x12\x41\x81\x00\x03\x02"
546
                   "\x03\t\x15\x17\"&)RGZfoo.1\x00"_buffer);
547
  }
4✔
548

549
  SECTION("second barcodes")
12✔
550
  {
2✔
551
    auto meta = read_meta{ read_type::se }.barcode(1);
2✔
552
    s.record(buf, record, meta);
2✔
553
    REQUIRE(buf == "?\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x07\x00H\x12"
8✔
554
                   "\x00\x00\x04\x00\n\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff"
555
                   "\xff\x00\x00\x00\x00record\x00\x12H\x12\x41\x81\x00\x03\x02"
556
                   "\x03\t\x15\x17\"&)RGZfoo.2\x00"_buffer);
557
  }
4✔
558
}
20✔
559

560
TEST_CASE("serialize BAM record with mate separator")
4✔
561
{
562
  buffer buf;
4✔
563
  serializer s{ GENERATE(output_format::bam, output_format::ubam) };
5✔
564

565
  fastq record{ "record/1", "ACGTACGATA", "!$#$*68CGJ" };
12✔
566

567
  SECTION("without mate sep")
12✔
568
  {
2✔
569
    s.record(buf, record, read_meta{ read_type::se });
2✔
570
    REQUIRE(buf == "8\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\t\x00H\x12"
6✔
571
                   "\x00\x00\x04\x00\n\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff"
572
                   "\xff\x00\x00\x00\x00record/1\x00\x12H\x12\x41\x81\x00\x03"
573
                   "\x02\x03\t\x15\x17\"&)"_buffer);
574
  }
4✔
575

576
  SECTION("with mate sep")
12✔
577
  {
2✔
578
    s.set_mate_separator('/');
2✔
579
    s.record(buf, record, read_meta{ read_type::se });
2✔
580
    REQUIRE(buf == "6\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x07\x00H\x12"
6✔
581
                   "\x00\x00\x04\x00\n\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff"
582
                   "\xff\x00\x00\x00\x00record\x00\x12H\x12\x41\x81\x00\x03\x02"
583
                   "\x03\t\x15\x17\"&)"_buffer);
584
  }
4✔
585
}
16✔
586

587
TEST_CASE("serialize read types for BAM")
20✔
588
{
589
  buffer buf;
20✔
590
  fastq record{ "record", "ACGTACGATA", "!$#$*68CGJ" };
60✔
591
  serializer s{ GENERATE(output_format::bam, output_format::ubam) };
21✔
592

593
  SECTION("single end")
60✔
594
  {
4✔
595
    s.record(buf,
16✔
596
             record,
597
             read_meta{ GENERATE(read_type::se, read_type::merged) });
14✔
598
    REQUIRE(buf == "6\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x07\x00H\x12"
12✔
599
                   "\x00\x00\x04\x00\n\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff"
600
                   "\xff\x00\x00\x00\x00record\x00\x12H\x12\x41\x81\x00\x03\x02"
601
                   "\x03\t\x15\x17\"&)"_buffer);
602
  }
20✔
603

604
  SECTION("single end failed QC")
60✔
605
  {
4✔
606
    s.record(buf,
16✔
607
             record,
608
             read_meta{ GENERATE(read_type::se_fail, read_type::merged_fail) });
14✔
609
    REQUIRE(buf == "6\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x07\x00H\x12"
12✔
610
                   "\x00\x00\x04\x02\n\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff"
611
                   "\xff\x00\x00\x00\x00record\x00\x12H\x12\x41\x81\x00\x03\x02"
612
                   "\x03\t\x15\x17\"&)"_buffer);
613
  }
20✔
614

615
  SECTION("paired end mate 1, mate 2 passed/failed")
60✔
616
  {
4✔
617
    s.record(buf,
16✔
618
             record,
619
             read_meta{ GENERATE(read_type::pe_1, read_type::singleton_1) });
14✔
620
    REQUIRE(buf == "6\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x07\x00H\x12"
12✔
621
                   "\x00\x00M\x00\n\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff"
622
                   "\x00\x00\x00\x00record\x00\x12H\x12\x41\x81\x00\x03\x02\x03"
623
                   "\t\x15\x17\"&)"_buffer);
624
  }
20✔
625

626
  SECTION("paired end mate 1 failed, mate 2 passed/failed")
60✔
627
  {
2✔
628
    s.record(buf, record, read_meta{ read_type::pe_1_fail });
2✔
629
    REQUIRE(buf == "6\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x07\x00H\x12"
6✔
630
                   "\x00\x00M\x02\n\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff"
631
                   "\x00\x00\x00\x00record\x00\x12H\x12\x41\x81\x00\x03\x02\x03"
632
                   "\t\x15\x17\"&)"_buffer);
633
  }
20✔
634

635
  SECTION("paired end mate 2, mate 1 passed/failed")
60✔
636
  {
4✔
637
    s.record(buf,
16✔
638
             record,
639
             read_meta{ GENERATE(read_type::pe_2, read_type::singleton_2) });
14✔
640
    REQUIRE(buf == "6\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x07\x00H\x12"
12✔
641
                   "\x00\x00\x8d\x00\n\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff"
642
                   "\xff\x00\x00\x00\x00record\x00\x12H\x12\x41\x81\x00\x03\x02"
643
                   "\x03\t\x15\x17\"&)"_buffer);
644
  }
20✔
645

646
  SECTION("paired end mate 2 failed, mate 1 passed/failed")
60✔
647
  {
2✔
648
    s.record(buf, record, read_meta{ read_type::pe_2_fail });
2✔
649
    REQUIRE(buf == "6\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x07\x00H\x12"
6✔
650
                   "\x00\x00\x8d\x02\n\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff"
651
                   "\xff\x00\x00\x00\x00record\x00\x12H\x12\x41\x81\x00\x03\x02"
652
                   "\x03\t\x15\x17\"&)"_buffer);
653
  }
20✔
654
}
60✔
655

656
TEST_CASE("serialize empty BAM record")
2✔
657
{
658
  fastq record{ "record", "", "" };
6✔
659

660
  buffer buf;
2✔
661
  serializer s{ GENERATE(output_format::bam, output_format::ubam) };
3✔
662
  s.record(buf, record, read_meta{ read_type::se });
2✔
663

664
  REQUIRE(buf == "'\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x07\x00H\x12"
8✔
665
                 "\x00\x00\x04\x00\x00\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff"
666
                 "\xff\x00\x00\x00\x00record\x00"_buffer);
667
}
6✔
668

669
TEST_CASE("serialize BAM record from FASTQ with meta-data")
2✔
670
{
671
  buffer buf;
2✔
672
  serializer s{ GENERATE(output_format::bam, output_format::ubam) };
3✔
673

674
  fastq record{ "record length=NA", "ACGTACGATA", "!$#$*68CGJ" };
6✔
675
  s.record(buf, record, read_meta{ read_type::se });
2✔
676

677
  // The meta-data is (currently) not serialized
678
  REQUIRE(buf == "6\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff\xff\x07\x00H\x12"
6✔
679
                 "\x00\x00\x04\x00\n\x00\x00\x00\xff\xff\xff\xff\xff\xff\xff"
680
                 "\xff\x00\x00\x00\x00record\x00\x12H\x12\x41\x81\x00\x03\x02"
681
                 "\x03\t\x15\x17\"&)"_buffer);
682
}
8✔
683

684
////////////////////////////////////////////////////////////////////////////////
685
// Common SAM/BAM tests
686

687
TEST_CASE("Serializing too long read names")
6✔
688
{
689
  // The maximum allowed read name length for SAM/BAM
690
  static_assert(EXTREMELY_LONG_NAME.size() == 254);
6✔
691

692
  auto name = std ::string{ EXTREMELY_LONG_NAME };
12✔
693
  fastq record_254{ name, "ACGT", "!!!!" };
18✔
694
  fastq record_255{ name + "5", "ACGT", "!!!!" };
24✔
695

696
  const read_meta meta{ read_type::se };
6✔
697
  buffer buf;
6✔
698

699
  SECTION("fastq is always valid")
18✔
700
  {
2✔
701
    serializer s{ GENERATE(output_format::fastq, output_format::fastq_gzip) };
3✔
702
    CHECK_NOTHROW(s.record(buf, record_254, meta));
2✔
703
    CHECK_NOTHROW(s.record(buf, record_255, meta));
4✔
704
  }
6✔
705

706
  SECTION("SAM/BAM allows only 254 characters")
18✔
707
  {
4✔
708
    serializer s{ GENERATE(output_format::sam,
17✔
709
                           output_format::sam_gzip,
710
                           output_format::bam,
711
                           output_format::ubam) };
12✔
712

713
    CHECK_NOTHROW(s.record(buf, record_254, meta));
4✔
714
    REQUIRE_THROWS_WITH(s.record(buf, record_255, meta),
12✔
715
                        Catch::Contains("Cannot encode read as SAM/BAM; read "
716
                                        "name is longer than 254 characters"));
717
  }
6✔
718
}
18✔
719

720
TEST_CASE("invalid read names")
6✔
721
{
722
  const read_meta meta{ read_type::se };
6✔
723
  fastq record{ "invalid\tname", "ACGT", "!!!!" };
18✔
724
  buffer buf;
6✔
725

726
  SECTION("FASTQ allows anything")
18✔
727
  {
2✔
728
    serializer s{ GENERATE(output_format::fastq, output_format::fastq_gzip) };
3✔
729
    CHECK_NOTHROW(s.record(buf, record, meta));
4✔
730
  }
6✔
731

732
  SECTION("SAM/BAM limits valid characters")
18✔
733
  {
4✔
734
    serializer s{ GENERATE(output_format::sam,
17✔
735
                           output_format::sam_gzip,
736
                           output_format::bam,
737
                           output_format::ubam) };
12✔
738

739
    REQUIRE_THROWS_WITH(s.record(buf, record, meta),
12✔
740
                        Catch::Contains("Cannot encode read as SAM/BAM; read "
741
                                        "name contains characters other than "
742
                                        "the allowed"));
743
  }
6✔
744
}
12✔
745

746
////////////////////////////////////////////////////////////////////////////////
747
// Tests of read meta data
748

749
TEST_CASE("read type to file type mapping")
10✔
750
{
751
  SECTION("--out-file1")
30✔
752
  {
2✔
753
    read_meta meta{ GENERATE(read_type::se, read_type::pe_1) };
3✔
754
    CHECK(meta.get_file() == read_file::mate_1);
6✔
755
  }
10✔
756

757
  SECTION("--out-file2")
30✔
758
  {
1✔
759
    read_meta meta{ GENERATE(read_type::pe_2) };
2✔
760
    CHECK(meta.get_file() == read_file::mate_2);
3✔
761
  }
10✔
762

763
  SECTION("--out-singleton")
30✔
764
  {
2✔
765
    read_meta meta(GENERATE(read_type::singleton_1, read_type::singleton_2));
3✔
766
    CHECK(meta.get_file() == read_file::singleton);
6✔
767
  }
10✔
768

769
  SECTION("--out-singleton")
30✔
770
  {
1✔
771
    read_meta meta(read_type::merged);
1✔
772
    CHECK(meta.get_file() == read_file::merged);
3✔
773
  }
10✔
774

775
  SECTION("--out-discarded")
30✔
776
  {
4✔
777
    read_meta meta(GENERATE(read_type::se_fail,
17✔
778
                            read_type::pe_1_fail,
779
                            read_type::pe_2_fail,
780
                            read_type::merged_fail));
12✔
781
    CHECK(meta.get_file() == read_file::discarded);
12✔
782
  }
10✔
783
}
10✔
784

785
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc