• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #64

16 Mar 2025 04:40PM UTC coverage: 27.111% (-0.04%) from 27.151%
#64

push

travis-ci

MikkelSchubert
add basic regression tests for SAM output

2597 of 9579 relevant lines covered (27.11%)

4267.51 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

6.72
/src/serializer.cpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2024 by Mikkel Schubert - mikkelsch@gmail.com           *
5
 *                                                                       *
6
 * This program is free software: you can redistribute it and/or modify  *
7
 * it under the terms of the GNU General Public License as published by  *
8
 * the Free Software Foundation, either version 3 of the License, or     *
9
 * (at your option) any later version.                                   *
10
 *                                                                       *
11
 * This program is distributed in the hope that it will be useful,       *
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
14
 * GNU General Public License for more details.                          *
15
 *                                                                       *
16
 * You should have received a copy of the GNU General Public License     *
17
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
18
\*************************************************************************/
19
#include "serializer.hpp"  // for fastq_serializer
20
#include "buffer.hpp"      // for buffer
21
#include "commontypes.hpp" // for output_format
22
#include "debug.hpp"       // for AR_REQUIRE, AR_FAIL
23
#include "fastq.hpp"       // for fastq
24
#include "fastq_enc.hpp"   // for PHRED_OFFSET_MIN
25
#include "main.hpp"        // for VERSION
26
#include "strutils.hpp"    // for join_text
27
#include <string_view>     // for string_view
28

29
namespace adapterremoval {
30

31
class userconfig;
32

33
namespace {
34

35
//! Standard header for BAM files prior to compression
36
constexpr std::string_view BAM_HEADER{ "BAM\1", 4 };
37
//! Standard header for SAM/BAM files
38
constexpr std::string_view SAM_HEADER = "@HD\tVN:1.6\tSO:unsorted\n";
39

40
/**
41
 * Flags mapping onto SAM/BAM flags
42
 *
43
 * 0x1 = read paired
44
 * 0x4 = read unmapped
45
 * 0x8 = mate unmapped
46
 * 0x40 = mate 1
47
 * 0x80 = mate 2
48
 * 0x200 = failed QC
49
 */
50

51
std::string_view
52
flags_to_sam(fastq_flags flags)
×
53
{
54
  switch (flags) {
×
55
    case fastq_flags::se:
×
56
      return "4";
×
57
    case fastq_flags::se_fail:
×
58
      return "516";
×
59
    case fastq_flags::pe_1:
×
60
      return "77";
×
61
    case fastq_flags::pe_1_fail:
×
62
      return "589";
×
63
    case fastq_flags::pe_2:
×
64
      return "141";
×
65
    case fastq_flags::pe_2_fail:
×
66
      return "653";
×
67
    default:
×
68
      AR_FAIL("invalid fastq flags");
×
69
  }
70
}
71

72
uint16_t
73
flags_to_bam(fastq_flags flags)
×
74
{
75
  switch (flags) {
×
76
    case fastq_flags::se:
77
      return 4;
78
    case fastq_flags::se_fail:
79
      return 516;
80
    case fastq_flags::pe_1:
81
      return 77;
82
    case fastq_flags::pe_1_fail:
83
      return 589;
84
    case fastq_flags::pe_2:
85
      return 141;
86
    case fastq_flags::pe_2_fail:
87
      return 653;
88
    default:
×
89
      AR_FAIL("invalid fastq flags");
×
90
  }
91
}
92

93
void
94
sequence_to_bam(buffer& buf, const std::string& seq)
×
95
{
96
  const auto size = buf.size();
×
97

98
  uint8_t pair = 0;
×
99
  for (size_t i = 0; i < seq.length(); ++i) {
×
100
    pair = (pair << 4) | "\0\1\0\2\10\0\20\4"[seq[i] & 0x7];
×
101

102
    if (i % 2) {
×
103
      buf.append_u8(pair);
×
104
      pair = 0;
×
105
    }
106
  }
107

108
  if (seq.length() % 2) {
×
109
    buf.append_u8(pair << 4);
×
110
  }
111

112
  AR_REQUIRE(buf.size() - size == (seq.length() + 1) / 2);
×
113
}
114

115
void
116
qualities_to_bam(buffer& buf, const std::string& quals)
×
117
{
118
  for (const auto c : quals) {
×
119
    buf.append_u8(c - PHRED_OFFSET_MIN);
×
120
  }
121
}
122

123
std::string
124
create_sam_header(const string_vec& args, const sample& s)
×
125
{
126
  std::string header{ SAM_HEADER };
×
127

128
  // @RG
129
  for (const auto& it : s) {
×
130
    if (it.has_read_group) {
×
131
      header.append(it.info.header());
×
132
      header.append("\n");
×
133
    }
134
  }
135

136
  // @PG
137
  header.append("@PG\tID:adapterremoval\tPN:adapterremoval\tCL:");
×
138
  header.append(join_text(args, " "));
×
139
  header.append("\tVN:");
×
140
  header.append(VERSION.substr(1)); // version without leading v
×
141
  header.append("\n");
×
142

143
  return header;
×
144
}
×
145

146
} // namespace
147

148
///////////////////////////////////////////////////////////////////////////////
149
// Implementations for `fastq_serializer`
150

151
void
152
fastq_serializer::header(buffer& /* buf */,
×
153
                         const string_vec& /* args */,
154
                         const sample& /* s */)
155
{
156
}
157

158
void
159
fastq_serializer::record(buffer& buf,
2✔
160
                         const fastq& record,
161
                         const sample_sequences& sequences,
162
                         const serializer_settings& settings)
163
{
164
  buf.append(record.header());
6✔
165
  if (settings.demultiplexing_only) {
2✔
166
    buf.append(" BC:");
×
167
    buf.append(sequences.barcode_1);
×
168
    if (sequences.barcode_2.length()) {
×
169
      buf.append_u8('-');
×
170
      buf.append(sequences.barcode_2);
×
171
    }
172
  }
173
  buf.append_u8('\n');
2✔
174
  buf.append(record.sequence());
6✔
175
  buf.append("\n+\n", 3);
2✔
176
  buf.append(record.qualities());
6✔
177
  buf.append_u8('\n');
2✔
178
}
2✔
179

180
///////////////////////////////////////////////////////////////////////////////
181
// Implementations for `sam_serializer`
182

183
void
184
sam_serializer::header(buffer& buf, const string_vec& args, const sample& s)
×
185
{
186
  buf.append(create_sam_header(args, s));
×
187
}
188

189
void
190
sam_serializer::record(buffer& buf,
×
191
                       const fastq& record,
192
                       const sample_sequences& sequences,
193
                       const serializer_settings& settings)
194
{
195
  buf.append(record.name(settings.mate_separator)); // 1. QNAME
×
196
  buf.append_u8('\t');
×
197
  buf.append(flags_to_sam(settings.flags)); // 2. FLAG
×
198
  buf.append("\t"
×
199
             "*\t" // 3. RNAME
200
             "0\t" // 4. POS
201
             "0\t" // 5. MAPQ
202
             "*\t" // 6. CIGAR
203
             "*\t" // 7. RNEXT
204
             "0\t" // 8. PNEXT
205
             "0\t" // 9. TLEN
206
  );
207
  if (record.length()) {
×
208
    buf.append(record.sequence()); // 10. SEQ
×
209
    buf.append_u8('\t');
×
210
    buf.append(record.qualities()); // 11. QUAL
×
211
  } else {
212
    buf.append("*\t" // 10. SEQ
×
213
               "*"   // 11. QUAL
214
    );
215
  }
216

217
  if (sequences.has_read_group) {
×
218
    buf.append("\tRG:Z:");
×
219
    buf.append(sequences.info.id());
×
220
  }
221

222
  buf.append("\n");
×
223
}
224

225
///////////////////////////////////////////////////////////////////////////////
226
// Implementations for `bam_serializer`
227

228
void
229
bam_serializer::header(buffer& buf, const string_vec& args, const sample& s)
×
230
{
231
  const auto sam_header = create_sam_header(args, s);
×
232

233
  buf.append(BAM_HEADER);            // magic
×
234
  buf.append_u32(sam_header.size()); // l_text
×
235
  buf.append(sam_header);            // terminating NUL not required
×
236
  buf.append_u32(0);                 // n_ref
×
237
}
238

239
void
240
bam_serializer::record(buffer& buf,
×
241
                       const fastq& record,
242
                       const sample_sequences& sequences,
243
                       const serializer_settings& settings)
244
{
245
  const size_t block_size_pos = buf.size();
×
246
  buf.append_u32(0);  // block size (preliminary)
×
247
  buf.append_i32(-1); // refID
×
248
  buf.append_i32(-1); // pos
×
249

250
  const auto name = record.name(settings.mate_separator).substr(0, 255);
×
251
  buf.append_u8(name.length() + 1); // l_read_name
×
252
  buf.append_u8(0);                 // mapq
×
253
  buf.append_u16(4680);             // bin (c.f. specification 4.2.1)
×
254
  buf.append_u16(0);                // n_cigar
×
255
  buf.append_u16(flags_to_bam(settings.flags)); // flags
×
256

257
  buf.append_u32(record.length()); // l_seq
×
258
  buf.append_i32(-1);              // next_refID
×
259
  buf.append_i32(-1);              // next_pos
×
260
  buf.append_i32(0);               // tlen
×
261

262
  buf.append(name); // read_name + NUL terminator
×
263
  buf.append_u8(0);
×
264
  // no cigar operations
265
  sequence_to_bam(buf, record.sequence());
×
266
  qualities_to_bam(buf, record.qualities());
×
267

268
  if (sequences.has_read_group) {
×
269
    // RG:Z:${ID} tag
270
    buf.append("RGZ");
×
271
    buf.append(sequences.info.id());
×
272
    buf.append_u8(0); // NUL
×
273
  }
274

275
  const size_t block_size = buf.size() - block_size_pos - 4;
×
276
  buf.put_u32(block_size_pos, block_size); // block size (final)
×
277
}
278

279
///////////////////////////////////////////////////////////////////////////////
280
// Implementations for `serializer`
281

282
serializer::serializer(output_format format)
×
283
{
284
  switch (format) {
×
285
    case output_format::fastq:
×
286
    case output_format::fastq_gzip:
×
287
      m_header = fastq_serializer::header;
×
288
      m_record = fastq_serializer::record;
×
289
      break;
×
290
    case output_format::sam:
×
291
    case output_format::sam_gzip:
×
292
      m_header = sam_serializer::header;
×
293
      m_record = sam_serializer::record;
×
294
      break;
×
295
    case output_format::bam:
×
296
    case output_format::ubam:
×
297
      m_header = bam_serializer::header;
×
298
      m_record = bam_serializer::record;
×
299
      break;
×
300
    default:
×
301
      AR_FAIL("invalid output format");
×
302
  }
303
}
304

305
void
306
serializer::header(buffer& buf, const string_vec& args) const
×
307
{
308
  m_header(buf, args, m_sample);
×
309
}
310

311
void
312
serializer::record(buffer& buf,
×
313
                   const fastq& record,
314
                   const fastq_flags flags,
315
                   const size_t barcode) const
316
{
317
  m_record(
×
318
    buf,
319
    record,
320
    m_sample.at(barcode),
×
321
    serializer_settings{ flags, m_mate_separator, m_demultiplexing_only });
×
322
}
323

324
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc