• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #38

07 Aug 2024 07:47PM UTC coverage: 83.365% (-3.5%) from 86.839%
#38

push

travis-ci

MikkelSchubert
additional tests

2190 of 2627 relevant lines covered (83.37%)

15528.72 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

11.9
/src/serializer.cpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2024 by Mikkel Schubert - mikkelsch@gmail.com           *
5
 *                                                                       *
6
 * This program is free software: you can redistribute it and/or modify  *
7
 * it under the terms of the GNU General Public License as published by  *
8
 * the Free Software Foundation, either version 3 of the License, or     *
9
 * (at your option) any later version.                                   *
10
 *                                                                       *
11
 * This program is distributed in the hope that it will be useful,       *
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
14
 * GNU General Public License for more details.                          *
15
 *                                                                       *
16
 * You should have received a copy of the GNU General Public License     *
17
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
18
\*************************************************************************/
19
#include "serializer.hpp"  // for fastq_serialzier
20
#include "buffer.hpp"      // for buffer
21
#include "commontypes.hpp" // for output_format
22
#include "debug.hpp"       // for AR_REQUIRE, AR_FAIL
23
#include "fastq.hpp"       // for fastq
24
#include "fastq_enc.hpp"   // for PHRED_OFFSET_MIN
25
#include "main.hpp"        // for VERSION
26
#include "strutils.hpp"    // for join_text
27
#include <string_view>     // for string_view
28

29
namespace adapterremoval {
30

31
class userconfig;
32

33
namespace {
34

35
//! Standard header for BAM files prior to compression
36
constexpr std::string_view BAM_HEADER{ "BAM\1", 4 };
37
//! Standard header for SAM/BAM files
38
constexpr std::string_view SAM_HEADER = "@HD\tVN:1.6\tSO:unsorted\n";
39

40
/**
41
 * Flags mapping onto SAM/BAM flags
42
 *
43
 * 0x1 = read paired
44
 * 0x4 = read unmapped
45
 * 0x8 = mate unmapped
46
 * 0x40 = mate 1
47
 * 0x80 = mate 2
48
 * 0x200 = failed QC
49
 */
50

51
std::string_view
52
flags_to_sam(fastq_flags flags)
×
53
{
54
  switch (flags) {
×
55
    case fastq_flags::se:
×
56
      return "4";
×
57
    case fastq_flags::se_fail:
×
58
      return "516";
×
59
    case fastq_flags::pe_1:
×
60
      return "77";
×
61
    case fastq_flags::pe_1_fail:
×
62
      return "589";
×
63
    case fastq_flags::pe_2:
×
64
      return "141";
×
65
    case fastq_flags::pe_2_fail:
×
66
      return "653";
×
67
    default:
×
68
      AR_FAIL("invalid fastq flags");
×
69
  }
70
}
71

72
uint16_t
73
flags_to_bam(fastq_flags flags)
×
74
{
75
  switch (flags) {
×
76
    case fastq_flags::se:
77
      return 4;
78
    case fastq_flags::se_fail:
79
      return 516;
80
    case fastq_flags::pe_1:
81
      return 77;
82
    case fastq_flags::pe_1_fail:
83
      return 589;
84
    case fastq_flags::pe_2:
85
      return 141;
86
    case fastq_flags::pe_2_fail:
87
      return 653;
88
    default:
×
89
      AR_FAIL("invalid fastq flags");
×
90
  }
91
}
92

93
void
94
sequence_to_bam(buffer& buf, const std::string& seq)
×
95
{
96
  const auto size = buf.size();
×
97

98
  uint8_t pair = 0;
×
99
  for (size_t i = 0; i < seq.length(); ++i) {
×
100
    pair = (pair << 4) | "\0\1\0\2\10\0\20\4"[seq[i] & 0x7];
×
101

102
    if (i % 2) {
×
103
      buf.append_u8(pair);
×
104
      pair = 0;
×
105
    }
106
  }
107

108
  if (seq.length() % 2) {
×
109
    buf.append_u8(pair << 4);
×
110
  }
111

112
  AR_REQUIRE(buf.size() - size == (seq.length() + 1) / 2);
×
113
}
114

115
void
116
qualities_to_bam(buffer& buf, const std::string& quals)
×
117
{
118
  for (const auto c : quals) {
×
119
    buf.append_u8(c - PHRED_OFFSET_MIN);
×
120
  }
121
}
122

123
void
124
write_fastq_record(buffer& buf, const fastq& record)
2✔
125
{
126
  buf.append_u8('@');
2✔
127
  buf.append(record.header());
6✔
128
  buf.append_u8('\n');
2✔
129
  buf.append(record.sequence());
6✔
130
  buf.append("\n+\n", 3);
2✔
131
  buf.append(record.qualities());
6✔
132
  buf.append_u8('\n');
2✔
133
}
2✔
134

135
std::string
136
create_sam_header(const string_vec& args)
×
137
{
138
  std::string header{ SAM_HEADER };
×
139
  header.append("@PG\tID:adapterremoval\tPN:adapterremoval\tCL:");
×
140
  header.append(join_text(args, " "));
×
141
  header.append("\tVN:");
×
142
  header.append(VERSION.substr(1)); // version without leading v
×
143
  header.append("\n");
×
144

145
  return header;
×
146
}
×
147

148
void
149
write_bam_header(buffer& buf, const string_vec& args)
×
150
{
151
  const auto sam_header = create_sam_header(args);
×
152

153
  buf.append(BAM_HEADER);            // magic
×
154
  buf.append_u32(sam_header.size()); // l_text
×
155
  buf.append(sam_header);            // terminating NUL not required
×
156
  buf.append_u32(0);                 // n_ref
×
157
}
158

159
void
160
write_sam_record(buffer& buf,
×
161
                 const fastq& record,
162
                 const fastq_flags flags,
163
                 char mate_separator)
164
{
165
  if (mate_separator) {
×
166
    switch (flags) {
×
167
      case fastq_flags::se:
×
168
      case fastq_flags::se_fail:
×
169
        mate_separator = '\0';
×
170
        break;
×
171
      case fastq_flags::pe_1:
172
      case fastq_flags::pe_1_fail:
173
      case fastq_flags::pe_2:
174
      case fastq_flags::pe_2_fail:
175
        break;
176
      default:
×
177
        AR_FAIL("invalid fastq flags");
×
178
    }
179
  }
180

181
  buf.append(record.name(mate_separator)); // 1. QNAME
×
182
  buf.append_u8('\t');
×
183
  buf.append(flags_to_sam(flags)); // 2. FLAG
×
184
  buf.append("\t"
×
185
             "*\t" // 3. RNAME
186
             "0\t" // 4. POS
187
             "0\t" // 5. MAPQ
188
             "*\t" // 6. CIGAR
189
             "*\t" // 7. RNEXT
190
             "0\t" // 8. PNEXT
191
             "0\t" // 9. TLEN
192
  );
193
  if (record.length()) {
×
194
    buf.append(record.sequence()); // 10. SEQ
×
195
    buf.append_u8('\t');
×
196
    buf.append(record.qualities()); // 11. QUAL
×
197
  } else {
198
    buf.append("*\t" // 10. SEQ
×
199
               "*"   // 11. QUAL
200
    );
201
  }
202

203
  buf.append("\tPG:Z:adapterremoval\n");
×
204
}
205

206
void
207
write_bam_record(buffer& buf,
×
208
                 const fastq& record,
209
                 const fastq_flags flags,
210
                 char mate_separator)
211
{
212
  const size_t block_size_pos = buf.size();
×
213
  buf.append_u32(0);  // block size (preliminary)
×
214
  buf.append_i32(-1); // refID
×
215
  buf.append_i32(-1); // pos
×
216

217
  const auto name = record.name(mate_separator).substr(0, 255);
×
218
  buf.append_u8(name.length() + 1);    // l_read_name
×
219
  buf.append_u8(0xFF);                 // mapq
×
220
  buf.append_u16(4680);                // bin (c.f. specification 4.2.1)
×
221
  buf.append_u16(0);                   // n_cigar
×
222
  buf.append_u16(flags_to_bam(flags)); // flags
×
223

224
  buf.append_u32(record.length()); // l_seq
×
225
  buf.append_i32(-1);              // next_refID
×
226
  buf.append_i32(-1);              // next_pos
×
227
  buf.append_i32(0);               // tlen
×
228

229
  buf.append(name); // read_name + NUL terminator
×
230
  buf.append_u8(0);
×
231
  // no cigar operations
232
  sequence_to_bam(buf, record.sequence());
×
233
  qualities_to_bam(buf, record.qualities());
×
234

235
  // PG:Z:adapterremoval tag
236
  buf.append("PGZadapterremoval");
×
237
  buf.append_u8(0); // NUL
×
238

239
  const size_t block_size = buf.size() - block_size_pos - 4;
×
240
  buf.put_u32(block_size_pos, block_size); // block size (final)
×
241
}
242

243
} // namespace
244

245
///////////////////////////////////////////////////////////////////////////////
246
// Implementations for `fastq_serializer`
247

248
void
249
fastq_serializer::header(buffer& buf,
×
250
                         const output_format format,
251
                         const string_vec& args)
252
{
253
  switch (format) {
×
254
    case output_format::fastq:
255
    case output_format::fastq_gzip:
256
      break;
257
    case output_format::sam:
×
258
    case output_format::sam_gzip:
×
259
      buf.append(create_sam_header(args));
×
260
      break;
×
261
    case output_format::bam:
×
262
    case output_format::ubam:
×
263
      write_bam_header(buf, args);
×
264
      break;
×
265
    default:
×
266
      AR_FAIL("invalid output format");
×
267
  }
268
}
269

270
void
271
fastq_serializer::record(buffer& buf,
2✔
272
                         const fastq& record,
273
                         const output_format format,
274
                         const fastq_flags flags,
275
                         const char mate_separator)
276
{
277
  switch (format) {
2✔
278
    case output_format::fastq:
2✔
279
    case output_format::fastq_gzip:
2✔
280
      return write_fastq_record(buf, record);
2✔
281
    case output_format::sam:
×
282
    case output_format::sam_gzip:
×
283
      return write_sam_record(buf, record, flags, mate_separator);
×
284
    case output_format::bam:
×
285
    case output_format::ubam:
×
286
      return write_bam_record(buf, record, flags, mate_separator);
×
287
    default:
×
288
      AR_FAIL("invalid output format");
×
289
  }
290
}
291

292
} // namespace adapterremoval
46✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc