• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #95

13 Apr 2025 06:28PM UTC coverage: 30.749%. Remained the same
#95

push

travis-ci

web-flow
rename enums read_type and fastq_flags (#121)

To clarifies usage of two commonly used enums, `read_type` was renamed
to `read_file`, since it is used to specify input and output file
types, while `fastq_flags` was renamed to `read_type` since it is used
to differentiate between different pre/post-processed read types

0 of 66 new or added lines in 6 files covered. (0.0%)

4 existing lines in 1 file now uncovered.

3157 of 10267 relevant lines covered (30.75%)

3943.28 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

6.72
/src/serializer.cpp
1
// SPDX-License-Identifier: GPL-3.0-or-later
2
// SPDX-FileCopyrightText: 2024 Mikkel Schubert <mikkelsch@gmail.com>
3
#include "serializer.hpp"  // for fastq_serializer
4
#include "buffer.hpp"      // for buffer
5
#include "commontypes.hpp" // for output_format
6
#include "debug.hpp"       // for AR_REQUIRE, AR_FAIL
7
#include "fastq.hpp"       // for fastq
8
#include "fastq_enc.hpp"   // for PHRED_OFFSET_MIN
9
#include "main.hpp"        // for VERSION
10
#include "strutils.hpp"    // for join_text
11
#include <string_view>     // for string_view
12

13
namespace adapterremoval {
14

15
class userconfig;
16

17
namespace {
18

19
//! Standard header for BAM files prior to compression
20
constexpr std::string_view BAM_HEADER{ "BAM\1", 4 };
21
//! Standard header for SAM/BAM files
22
constexpr std::string_view SAM_HEADER = "@HD\tVN:1.6\tSO:unsorted\n";
23

24
/**
25
 * Flags mapping onto SAM/BAM flags
26
 *
27
 * 0x1 = read paired
28
 * 0x4 = read unmapped
29
 * 0x8 = mate unmapped
30
 * 0x40 = mate 1
31
 * 0x80 = mate 2
32
 * 0x200 = failed QC
33
 */
34

35
std::string_view
NEW
36
flags_to_sam(read_type flags)
×
37
{
38
  switch (flags) {
×
NEW
39
    case read_type::se:
×
40
      return "4";
×
NEW
41
    case read_type::se_fail:
×
42
      return "516";
×
NEW
43
    case read_type::pe_1:
×
44
      return "77";
×
NEW
45
    case read_type::pe_1_fail:
×
46
      return "589";
×
NEW
47
    case read_type::pe_2:
×
48
      return "141";
×
NEW
49
    case read_type::pe_2_fail:
×
50
      return "653";
×
51
    default:
×
52
      AR_FAIL("invalid fastq flags");
×
53
  }
54
}
55

56
uint16_t
NEW
57
flags_to_bam(read_type flags)
×
58
{
59
  switch (flags) {
×
60
    case read_type::se:
61
      return 4;
62
    case read_type::se_fail:
63
      return 516;
64
    case read_type::pe_1:
65
      return 77;
66
    case read_type::pe_1_fail:
67
      return 589;
68
    case read_type::pe_2:
69
      return 141;
70
    case read_type::pe_2_fail:
71
      return 653;
72
    default:
×
73
      AR_FAIL("invalid fastq flags");
×
74
  }
75
}
76

77
void
78
sequence_to_bam(buffer& buf, const std::string& seq)
×
79
{
80
  const auto size = buf.size();
×
81

82
  uint8_t pair = 0;
×
83
  for (size_t i = 0; i < seq.length(); ++i) {
×
84
    pair = (pair << 4) | "\0\1\0\2\10\0\20\4"[seq[i] & 0x7];
×
85

86
    if (i % 2) {
×
87
      buf.append_u8(pair);
×
88
      pair = 0;
×
89
    }
90
  }
91

92
  if (seq.length() % 2) {
×
93
    buf.append_u8(pair << 4);
×
94
  }
95

96
  AR_REQUIRE(buf.size() - size == (seq.length() + 1) / 2);
×
97
}
98

99
void
100
qualities_to_bam(buffer& buf, const std::string& quals)
×
101
{
102
  for (const auto c : quals) {
×
103
    buf.append_u8(c - PHRED_OFFSET_MIN);
×
104
  }
105
}
106

107
std::string
108
create_sam_header(const string_vec& args, const sample& s)
×
109
{
110
  std::string header{ SAM_HEADER };
×
111

112
  // @RG
113
  for (const auto& it : s) {
×
114
    if (it.has_read_group) {
×
115
      header.append(it.read_group_.header());
×
116
      header.append("\n");
×
117
    }
118
  }
119

120
  // @PG
121
  header.append("@PG\tID:adapterremoval\tPN:adapterremoval\tCL:");
×
122
  header.append(join_text(args, " "));
×
123
  header.append("\tVN:");
×
124
  header.append(VERSION.substr(1)); // version without leading v
×
125
  header.append("\n");
×
126

127
  return header;
×
128
}
×
129

130
} // namespace
131

132
///////////////////////////////////////////////////////////////////////////////
133
// Implementations for `fastq_serializer`
134

135
void
136
fastq_serializer::header(buffer& /* buf */,
×
137
                         const string_vec& /* args */,
138
                         const sample& /* s */)
139
{
140
}
141

142
void
143
fastq_serializer::record(buffer& buf,
2✔
144
                         const fastq& record,
145
                         const sample_sequences& sequences,
146
                         const serializer_settings& settings)
147
{
148
  buf.append(record.header());
4✔
149
  if (settings.demultiplexing_only) {
2✔
150
    buf.append(" BC:");
×
151
    buf.append(sequences.barcode_1);
×
152
    if (sequences.barcode_2.length()) {
×
153
      buf.append_u8('-');
×
154
      buf.append(sequences.barcode_2);
×
155
    }
156
  }
157
  buf.append_u8('\n');
2✔
158
  buf.append(record.sequence());
4✔
159
  buf.append("\n+\n", 3);
2✔
160
  buf.append(record.qualities());
4✔
161
  buf.append_u8('\n');
2✔
162
}
2✔
163

164
///////////////////////////////////////////////////////////////////////////////
165
// Implementations for `sam_serializer`
166

167
void
168
sam_serializer::header(buffer& buf, const string_vec& args, const sample& s)
×
169
{
170
  buf.append(create_sam_header(args, s));
×
171
}
172

173
void
174
sam_serializer::record(buffer& buf,
×
175
                       const fastq& record,
176
                       const sample_sequences& sequences,
177
                       const serializer_settings& settings)
178
{
179
  buf.append(record.name(settings.mate_separator)); // 1. QNAME
×
180
  buf.append_u8('\t');
×
181
  buf.append(flags_to_sam(settings.flags)); // 2. FLAG
×
182
  buf.append("\t"
×
183
             "*\t" // 3. RNAME
184
             "0\t" // 4. POS
185
             "0\t" // 5. MAPQ
186
             "*\t" // 6. CIGAR
187
             "*\t" // 7. RNEXT
188
             "0\t" // 8. PNEXT
189
             "0\t" // 9. TLEN
190
  );
191
  if (record.length()) {
×
192
    buf.append(record.sequence()); // 10. SEQ
×
193
    buf.append_u8('\t');
×
194
    buf.append(record.qualities()); // 11. QUAL
×
195
  } else {
196
    buf.append("*\t" // 10. SEQ
×
197
               "*"   // 11. QUAL
198
    );
199
  }
200

201
  if (sequences.has_read_group) {
×
202
    buf.append("\tRG:Z:");
×
203
    buf.append(sequences.read_group_.id());
×
204
  }
205

206
  buf.append("\n");
×
207
}
208

209
///////////////////////////////////////////////////////////////////////////////
210
// Implementations for `bam_serializer`
211

212
void
213
bam_serializer::header(buffer& buf, const string_vec& args, const sample& s)
×
214
{
215
  const auto sam_header = create_sam_header(args, s);
×
216

217
  buf.append(BAM_HEADER);            // magic
×
218
  buf.append_u32(sam_header.size()); // l_text
×
219
  buf.append(sam_header);            // terminating NUL not required
×
220
  buf.append_u32(0);                 // n_ref
×
221
}
222

223
void
224
bam_serializer::record(buffer& buf,
×
225
                       const fastq& record,
226
                       const sample_sequences& sequences,
227
                       const serializer_settings& settings)
228
{
229
  const size_t block_size_pos = buf.size();
×
230
  buf.append_u32(0);  // block size (preliminary)
×
231
  buf.append_i32(-1); // refID
×
232
  buf.append_i32(-1); // pos
×
233

234
  const auto name = record.name(settings.mate_separator).substr(0, 255);
×
235
  buf.append_u8(name.length() + 1); // l_read_name
×
236
  buf.append_u8(0);                 // mapq
×
237
  buf.append_u16(4680);             // bin (c.f. specification 4.2.1)
×
238
  buf.append_u16(0);                // n_cigar
×
239
  buf.append_u16(flags_to_bam(settings.flags)); // flags
×
240

241
  buf.append_u32(record.length()); // l_seq
×
242
  buf.append_i32(-1);              // next_refID
×
243
  buf.append_i32(-1);              // next_pos
×
244
  buf.append_i32(0);               // tlen
×
245

246
  buf.append(name); // read_name + NUL terminator
×
247
  buf.append_u8(0);
×
248
  // no cigar operations
249
  sequence_to_bam(buf, record.sequence());
×
250
  qualities_to_bam(buf, record.qualities());
×
251

252
  if (sequences.has_read_group) {
×
253
    // RG:Z:${ID} tag
254
    buf.append("RGZ");
×
255
    buf.append(sequences.read_group_.id());
×
256
    buf.append_u8(0); // NUL
×
257
  }
258

259
  const size_t block_size = buf.size() - block_size_pos - 4;
×
260
  buf.put_u32(block_size_pos, block_size); // block size (final)
×
261
}
262

263
///////////////////////////////////////////////////////////////////////////////
264
// Implementations for `serializer`
265

266
serializer::serializer(output_format format)
×
267
{
268
  switch (format) {
×
269
    case output_format::fastq:
×
270
    case output_format::fastq_gzip:
×
271
      m_header = fastq_serializer::header;
×
272
      m_record = fastq_serializer::record;
×
273
      break;
×
274
    case output_format::sam:
×
275
    case output_format::sam_gzip:
×
276
      m_header = sam_serializer::header;
×
277
      m_record = sam_serializer::record;
×
278
      break;
×
279
    case output_format::bam:
×
280
    case output_format::ubam:
×
281
      m_header = bam_serializer::header;
×
282
      m_record = bam_serializer::record;
×
283
      break;
×
284
    default:
×
285
      AR_FAIL("invalid output format");
×
286
  }
287
}
288

289
void
290
serializer::header(buffer& buf, const string_vec& args) const
×
291
{
292
  m_header(buf, args, m_sample);
×
293
}
294

295
void
296
serializer::record(buffer& buf,
×
297
                   const fastq& record,
298
                   const read_type flags,
299
                   const size_t barcode) const
300
{
301
  m_record(
×
302
    buf,
303
    record,
304
    m_sample.at(barcode),
×
305
    serializer_settings{ flags, m_mate_separator, m_demultiplexing_only });
×
306
}
307

308
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc