• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #43

08 Sep 2024 08:47AM UTC coverage: 75.266% (-4.5%) from 79.763%
#43

push

travis-ci

MikkelSchubert
minor improvements to assert signatures and silence lints

1 of 1 new or added line in 1 file covered. (100.0%)

75 existing lines in 3 files now uncovered.

2404 of 3194 relevant lines covered (75.27%)

12788.78 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

6.77
/src/serializer.cpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2024 by Mikkel Schubert - mikkelsch@gmail.com           *
5
 *                                                                       *
6
 * This program is free software: you can redistribute it and/or modify  *
7
 * it under the terms of the GNU General Public License as published by  *
8
 * the Free Software Foundation, either version 3 of the License, or     *
9
 * (at your option) any later version.                                   *
10
 *                                                                       *
11
 * This program is distributed in the hope that it will be useful,       *
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
14
 * GNU General Public License for more details.                          *
15
 *                                                                       *
16
 * You should have received a copy of the GNU General Public License     *
17
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
18
\*************************************************************************/
19
#include "serializer.hpp"  // for fastq_serialzier
20
#include "buffer.hpp"      // for buffer
21
#include "commontypes.hpp" // for output_format
22
#include "debug.hpp"       // for AR_REQUIRE, AR_FAIL
23
#include "fastq.hpp"       // for fastq
24
#include "fastq_enc.hpp"   // for PHRED_OFFSET_MIN
25
#include "main.hpp"        // for VERSION
26
#include "strutils.hpp"    // for join_text
27
#include <string_view>     // for string_view
28

29
namespace adapterremoval {
30

31
class userconfig;
32

33
namespace {
34

35
//! Standard header for BAM files prior to compression
36
constexpr std::string_view BAM_HEADER{ "BAM\1", 4 };
37
//! Standard header for SAM/BAM files
38
constexpr std::string_view SAM_HEADER = "@HD\tVN:1.6\tSO:unsorted\n";
39

40
/**
41
 * Flags mapping onto SAM/BAM flags
42
 *
43
 * 0x1 = read paired
44
 * 0x4 = read unmapped
45
 * 0x8 = mate unmapped
46
 * 0x40 = mate 1
47
 * 0x80 = mate 2
48
 * 0x200 = failed QC
49
 */
50

51
std::string_view
52
flags_to_sam(fastq_flags flags)
×
53
{
54
  switch (flags) {
×
55
    case fastq_flags::se:
×
56
      return "4";
×
57
    case fastq_flags::se_fail:
×
58
      return "516";
×
59
    case fastq_flags::pe_1:
×
60
      return "77";
×
61
    case fastq_flags::pe_1_fail:
×
62
      return "589";
×
63
    case fastq_flags::pe_2:
×
64
      return "141";
×
65
    case fastq_flags::pe_2_fail:
×
66
      return "653";
×
67
    default:
×
68
      AR_FAIL("invalid fastq flags");
×
69
  }
70
}
71

72
uint16_t
73
flags_to_bam(fastq_flags flags)
×
74
{
75
  switch (flags) {
×
76
    case fastq_flags::se:
77
      return 4;
78
    case fastq_flags::se_fail:
79
      return 516;
80
    case fastq_flags::pe_1:
81
      return 77;
82
    case fastq_flags::pe_1_fail:
83
      return 589;
84
    case fastq_flags::pe_2:
85
      return 141;
86
    case fastq_flags::pe_2_fail:
87
      return 653;
88
    default:
×
89
      AR_FAIL("invalid fastq flags");
×
90
  }
91
}
92

93
void
94
sequence_to_bam(buffer& buf, const std::string& seq)
×
95
{
96
  const auto size = buf.size();
×
97

98
  uint8_t pair = 0;
×
99
  for (size_t i = 0; i < seq.length(); ++i) {
×
100
    pair = (pair << 4) | "\0\1\0\2\10\0\20\4"[seq[i] & 0x7];
×
101

102
    if (i % 2) {
×
103
      buf.append_u8(pair);
×
104
      pair = 0;
×
105
    }
106
  }
107

108
  if (seq.length() % 2) {
×
109
    buf.append_u8(pair << 4);
×
110
  }
111

112
  AR_REQUIRE(buf.size() - size == (seq.length() + 1) / 2);
×
113
}
114

115
void
116
qualities_to_bam(buffer& buf, const std::string& quals)
×
117
{
118
  for (const auto c : quals) {
×
119
    buf.append_u8(c - PHRED_OFFSET_MIN);
×
120
  }
121
}
122

123
std::string
124
create_sam_header(const string_vec& args, const sample& s)
×
125
{
126
  std::string header{ SAM_HEADER };
×
127

128
  // @RG
129
  for (const auto& it : s) {
×
130
    header.append(it.info.header());
×
UNCOV
131
    header.append("\n");
×
132
  }
133

134
  // @PG
135
  header.append("@PG\tID:adapterremoval\tPN:adapterremoval\tCL:");
×
136
  header.append(join_text(args, " "));
×
137
  header.append("\tVN:");
×
UNCOV
138
  header.append(VERSION.substr(1)); // version without leading v
×
139
  header.append("\n");
×
140

UNCOV
141
  return header;
×
UNCOV
142
}
×
143

144
} // namespace
145

146
///////////////////////////////////////////////////////////////////////////////
147
// Implementations for `fastq_serializer`
148

149
void
150
fastq_serializer::header(buffer& /* buf */,
×
151
                         const string_vec& /* args */,
152
                         const sample& /* s */)
153
{
154
}
155

156
void
157
fastq_serializer::record(buffer& buf,
2✔
158
                         const fastq& record,
159
                         const sample_sequences& sequences,
160
                         const serializer_settings& settings)
161
{
162
  buf.append(record.header());
6✔
163
  if (settings.demultiplexing_only) {
2✔
164
    buf.append(" BC:");
×
UNCOV
165
    buf.append(sequences.barcode_1);
×
UNCOV
166
    if (sequences.barcode_2.length()) {
×
UNCOV
167
      buf.append_u8('-');
×
168
      buf.append(sequences.barcode_2);
×
169
    }
170
  }
171
  buf.append_u8('\n');
2✔
172
  buf.append(record.sequence());
6✔
173
  buf.append("\n+\n", 3);
2✔
174
  buf.append(record.qualities());
6✔
175
  buf.append_u8('\n');
2✔
176
}
2✔
177

178
///////////////////////////////////////////////////////////////////////////////
179
// Implementations for `sam_serializer`
180

181
void
182
sam_serializer::header(buffer& buf, const string_vec& args, const sample& s)
×
183
{
184
  buf.append(create_sam_header(args, s));
×
185
}
186

187
void
UNCOV
188
sam_serializer::record(buffer& buf,
×
189
                       const fastq& record,
190
                       const sample_sequences& sequences,
191
                       const serializer_settings& settings)
192
{
UNCOV
193
  buf.append(record.name(settings.mate_separator)); // 1. QNAME
×
UNCOV
194
  buf.append_u8('\t');
×
UNCOV
195
  buf.append(flags_to_sam(settings.flags)); // 2. FLAG
×
UNCOV
196
  buf.append("\t"
×
197
             "*\t" // 3. RNAME
198
             "0\t" // 4. POS
199
             "0\t" // 5. MAPQ
200
             "*\t" // 6. CIGAR
201
             "*\t" // 7. RNEXT
202
             "0\t" // 8. PNEXT
203
             "0\t" // 9. TLEN
204
  );
205
  if (record.length()) {
×
206
    buf.append(record.sequence()); // 10. SEQ
×
UNCOV
207
    buf.append_u8('\t');
×
UNCOV
208
    buf.append(record.qualities()); // 11. QUAL
×
209
  } else {
UNCOV
210
    buf.append("*\t" // 10. SEQ
×
211
               "*"   // 11. QUAL
212
    );
213
  }
214

215
  buf.append("\tRG:Z:");
×
UNCOV
216
  buf.append(sequences.info.id());
×
UNCOV
217
  buf.append("\tPG:Z:adapterremoval\n");
×
218
}
219

220
///////////////////////////////////////////////////////////////////////////////
221
// Implementations for `bam_serializer`
222

223
void
224
bam_serializer::header(buffer& buf, const string_vec& args, const sample& s)
×
225
{
226
  const auto sam_header = create_sam_header(args, s);
×
227

228
  buf.append(BAM_HEADER);            // magic
×
229
  buf.append_u32(sam_header.size()); // l_text
×
230
  buf.append(sam_header);            // terminating NUL not required
×
UNCOV
231
  buf.append_u32(0);                 // n_ref
×
232
}
233

234
void
UNCOV
235
bam_serializer::record(buffer& buf,
×
236
                       const fastq& record,
237
                       const sample_sequences& sequences,
238
                       const serializer_settings& settings)
239
{
UNCOV
240
  const size_t block_size_pos = buf.size();
×
UNCOV
241
  buf.append_u32(0);  // block size (preliminary)
×
242
  buf.append_i32(-1); // refID
×
243
  buf.append_i32(-1); // pos
×
244

UNCOV
245
  const auto name = record.name(settings.mate_separator).substr(0, 255);
×
UNCOV
246
  buf.append_u8(name.length() + 1); // l_read_name
×
247
  buf.append_u8(0);                 // mapq
×
248
  buf.append_u16(4680);             // bin (c.f. specification 4.2.1)
×
249
  buf.append_u16(0);                // n_cigar
×
UNCOV
250
  buf.append_u16(flags_to_bam(settings.flags)); // flags
×
251

252
  buf.append_u32(record.length()); // l_seq
×
253
  buf.append_i32(-1);              // next_refID
×
UNCOV
254
  buf.append_i32(-1);              // next_pos
×
UNCOV
255
  buf.append_i32(0);               // tlen
×
256

UNCOV
257
  buf.append(name); // read_name + NUL terminator
×
UNCOV
258
  buf.append_u8(0);
×
259
  // no cigar operations
260
  sequence_to_bam(buf, record.sequence());
×
UNCOV
261
  qualities_to_bam(buf, record.qualities());
×
262

263
  // PG:Z:adapterremoval tag
UNCOV
264
  buf.append("RGZ");
×
UNCOV
265
  buf.append(sequences.info.id());
×
UNCOV
266
  buf.append_u8(0); // NUL
×
267

268
  // PG:Z:adapterremoval tag
UNCOV
269
  buf.append("PGZadapterremoval");
×
UNCOV
270
  buf.append_u8(0); // NUL
×
271

UNCOV
272
  const size_t block_size = buf.size() - block_size_pos - 4;
×
UNCOV
273
  buf.put_u32(block_size_pos, block_size); // block size (final)
×
274
}
275

276
///////////////////////////////////////////////////////////////////////////////
277
// Implementations for `serializer`
278

UNCOV
279
serializer::serializer(output_format format)
×
280
{
UNCOV
281
  switch (format) {
×
UNCOV
282
    case output_format::fastq:
×
UNCOV
283
    case output_format::fastq_gzip:
×
UNCOV
284
      m_header = fastq_serializer::header;
×
285
      m_record = fastq_serializer::record;
×
UNCOV
286
      break;
×
UNCOV
287
    case output_format::sam:
×
UNCOV
288
    case output_format::sam_gzip:
×
289
      m_header = sam_serializer::header;
×
UNCOV
290
      m_record = sam_serializer::record;
×
UNCOV
291
      break;
×
UNCOV
292
    case output_format::bam:
×
293
    case output_format::ubam:
×
UNCOV
294
      m_header = bam_serializer::header;
×
UNCOV
295
      m_record = bam_serializer::record;
×
UNCOV
296
      break;
×
UNCOV
297
    default:
×
UNCOV
298
      AR_FAIL("invalid output format");
×
299
  }
300
}
301

302
void
UNCOV
303
serializer::header(buffer& buf, const string_vec& args) const
×
304
{
UNCOV
305
  m_header(buf, args, m_sample);
×
306
}
307

308
void
UNCOV
309
serializer::record(buffer& buf,
×
310
                   const fastq& record,
311
                   const fastq_flags flags,
312
                   const size_t barcode) const
313
{
314
  m_record(
×
315
    buf,
316
    record,
UNCOV
317
    m_sample.at(barcode),
×
UNCOV
318
    serializer_settings{ flags, m_mate_separator, m_demultiplexing_only });
×
319
}
320

321
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc