• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #38

07 Aug 2024 07:47PM UTC coverage: 83.365% (-3.5%) from 86.839%
#38

push

travis-ci

MikkelSchubert
additional tests

2190 of 2627 relevant lines covered (83.37%)

15528.72 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.89
/src/fastq.cpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2011 by Stinus Lindgreen - stinus@binf.ku.dk            *
5
 * Copyright (C) 2014 by Mikkel Schubert - mikkelsch@gmail.com           *
6
 *                                                                       *
7
 * This program is free software: you can redistribute it and/or modify  *
8
 * it under the terms of the GNU General Public License as published by  *
9
 * the Free Software Foundation, either version 3 of the License, or     *
10
 * (at your option) any later version.                                   *
11
 *                                                                       *
12
 * This program is distributed in the hope that it will be useful,       *
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
15
 * GNU General Public License for more details.                          *
16
 *                                                                       *
17
 * You should have received a copy of the GNU General Public License     *
18
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
19
\*************************************************************************/
20
#include "fastq.hpp"
21
#include "debug.hpp"      // for AR_REQUIRE, AR_FAIL
22
#include "errors.hpp"     // for fastq_error
23
#include "linereader.hpp" // for line_reader_base
24
#include <algorithm>      // for reverse, count, max, min
25
#include <cmath>          // for log10, pow
26
#include <numeric>        // for accumulate
27
#include <sstream>        // for ostringstream
28
#include <string_view>    // for string_view
29

30
namespace adapterremoval {
31

32
namespace {
33

34
std::vector<double>
35
init_phred_to_p_values()
1✔
36
{
37
  std::vector<double> result;
1✔
38
  for (size_t i = PHRED_SCORE_MIN; i <= PHRED_SCORE_MAX; ++i) {
95✔
39
    result.push_back(std::pow(10.0, static_cast<double>(i) / -10.0));
188✔
40
  }
41

42
  return result;
1✔
43
}
×
44

45
const std::vector<double> g_phred_to_p = init_phred_to_p_values();
46

47
enum class read_mate
48
{
49
  unknown,
50
  mate_1,
51
  mate_2,
52
};
53

54
struct mate_info
55
{
56
  std::string_view desc() const
12✔
57
  {
58
    switch (mate) {
12✔
59
      case read_mate::unknown:
4✔
60
        return "unknown";
8✔
61
      case read_mate::mate_1:
5✔
62
        return "mate 1";
10✔
63
      case read_mate::mate_2:
3✔
64
        return "mate 2";
6✔
65
      default:
×
66
        AR_FAIL("Invalid mate in mate_info::desc");
×
67
    }
68
  }
69

70
  //! Read name without mate number or meta-data
71
  std::string_view name{};
72
  //! Which mate in a pair, if identified
73
  read_mate mate = read_mate::unknown;
74
  //! Position of the separator character in the header (if any)
75
  size_t sep_pos = std::string::npos;
76
};
77

78
mate_info
79
get_mate_info(const fastq& read, char mate_separator)
80✔
80
{
81
  const std::string_view header = read.header();
160✔
82

83
  size_t pos = header.find_first_of(' ');
80✔
84
  if (pos == std::string::npos) {
80✔
85
    pos = header.length();
78✔
86
  }
87

88
  mate_info info;
80✔
89
  if (pos >= 2 && header.at(pos - 2) == mate_separator) {
160✔
90
    const char digit = header.at(pos - 1);
92✔
91

92
    if (digit == '1') {
46✔
93
      info.mate = read_mate::mate_1;
22✔
94
      pos -= 2;
22✔
95
      info.sep_pos = pos;
22✔
96
    } else if (digit == '2') {
24✔
97
      info.mate = read_mate::mate_2;
15✔
98
      pos -= 2;
15✔
99
      info.sep_pos = pos;
15✔
100
    }
101
  }
102

103
  info.name = header.substr(0, pos);
80✔
104
  return info;
80✔
105
}
106

107
size_t
108
count_poly_x_tail(const std::string& m_sequence,
75✔
109
                  const char nucleotide,
110
                  const size_t min_length)
111
{
112
  // Maximum number of sequential mismatches
113
  const size_t max_seq_mismatches = 2;
75✔
114
  // Number of called bases required per mismatch (via fastp)
115
  const size_t min_bases_per_mismatch = 8;
75✔
116

117
  //! Number of bases in the alignment to trim, excluding leading mismatches
118
  size_t n_trim = 0;
75✔
119
  //! Number of bases in the alignment
120
  size_t n_bases = 0;
75✔
121
  //! Number of uncalled bases (Ns) in the alignment
122
  size_t n_uncalled = 0;
75✔
123
  //! Number of mismatches in the alignment
124
  size_t n_mismatches = 0;
75✔
125
  //! Current number of sequential mismatches in the alignment
126
  size_t n_seq_mismatches = 0;
75✔
127

128
  for (auto it = m_sequence.rbegin(); it != m_sequence.rend(); ++it) {
1,852✔
129
    n_bases++;
440✔
130

131
    if (*it == nucleotide) {
880✔
132
      n_trim = n_bases;
133
      n_seq_mismatches = 0;
134
    } else if (*it == 'N') {
240✔
135
      n_uncalled++;
14✔
136
      // Trailing Ns are allowed only after a match
137
      if (!n_seq_mismatches) {
14✔
138
        n_trim = n_bases;
12✔
139
      }
140
    } else {
141
      n_mismatches++;
106✔
142
      n_seq_mismatches++;
106✔
143
      if (n_seq_mismatches > max_seq_mismatches ||
212✔
144
          n_mismatches > std::max(min_length, n_bases - n_uncalled) /
264✔
145
                           min_bases_per_mismatch) {
146
        // The final mismatch is not counted as part of the alignment
147
        n_bases--;
148
        break;
149
      }
150
    }
151
  }
152

153
  if (n_bases - n_uncalled >= min_length) {
75✔
154
    return n_trim;
28✔
155
  }
156

157
  return 0;
158
}
159

160
} // namespace
161

162
///////////////////////////////////////////////////////////////////////////////
163
// fastq
164

165
fastq::fastq()
147✔
166
  : m_header()
147✔
167
  , m_sequence()
147✔
168
  , m_qualities()
147✔
169
{
170
}
147✔
171

172
fastq::fastq(std::string header,
728✔
173
             std::string sequence,
174
             std::string qualities,
175
             const fastq_encoding& encoding)
728✔
176
  : m_header(std::move(header))
748✔
177
  , m_sequence(std::move(sequence))
748✔
178
  , m_qualities(std::move(qualities))
748✔
179
{
180
  if (m_qualities.length() != m_sequence.length()) {
2,184✔
181
    throw fastq_error(
12✔
182
      "invalid FASTQ record; sequence/quality length does not match");
8✔
183
  }
184

185
  post_process(encoding);
724✔
186
}
808✔
187

188
fastq::fastq(std::string header, std::string sequence)
474✔
189
  : m_header(std::move(header))
474✔
190
  , m_sequence(std::move(sequence))
474✔
191
  , m_qualities(std::string(m_sequence.length(), '!'))
1,422✔
192
{
193
  post_process(FASTQ_ENCODING_33);
474✔
194
}
474✔
195

196
bool
197
fastq::operator==(const fastq& other) const
238✔
198
{
199
  return (m_header == other.m_header) && (m_sequence == other.m_sequence) &&
476✔
200
         (m_qualities == other.m_qualities);
238✔
201
}
202

203
std::string_view
204
fastq::name(const char mate_separator) const
24✔
205
{
206
  std::string_view header = m_header;
24✔
207
  const size_t pos = header.find_first_of(' ');
24✔
208
  if (pos != std::string::npos) {
24✔
209
    header = header.substr(0, pos);
2✔
210
  }
211

212
  if (mate_separator && header.size() > 1 &&
30✔
213
      (header.back() == '1' || header.back() == '2') &&
34✔
214
      header.at(header.length() - 2) == mate_separator) {
8✔
215
    header = header.substr(0, header.length() - 2);
2✔
216
  }
217

218
  return header;
24✔
219
}
220

221
size_t
222
fastq::count_ns() const
6✔
223
{
224
  return static_cast<size_t>(
6✔
225
    std::count(m_sequence.begin(), m_sequence.end(), 'N'));
24✔
226
}
227

228
namespace {
229

230
/**
231
 * Calculate the absolute sequence complexity score, under the assumption that
232
 * the sequence does not contain Ns. Should the sequence contain Ns, then this
233
 * algorithm would overestimate the sequence complexity, and therefore returns
234
 * -1 to indicate failure.
235
 */
236
int
237
fast_calculate_complexity(const std::string& sequence)
14✔
238
{
239
  // The last base is not checked in the loop below
240
  if (sequence.back() == 'N') {
28✔
241
    return -1;
242
  }
243

244
  const size_t length = sequence.length() - 1;
10✔
245
  size_t i = 0;
10✔
246
  int score = 0;
10✔
247

248
  // Fixed block sizes allows gcc/clang to optimize the loop
249
  const size_t BLOCK_SIZE = 16;
10✔
250
  for (; i + BLOCK_SIZE < length; i += BLOCK_SIZE) {
10✔
251
    for (size_t j = 0; j < BLOCK_SIZE; ++j, ++i) {
×
252
      if (sequence[i] != sequence[i + 1]) {
×
253
        score++;
×
254
      }
255

256
      if (sequence[i] == 'N') {
×
257
        return -1;
258
      }
259
    }
260
  }
261

262
  for (; i < length; ++i) {
35✔
263
    if (sequence[i] != sequence[i + 1]) {
84✔
264
      score++;
21✔
265
    }
266

267
    if (sequence[i] == 'N') {
56✔
268
      return -1;
269
    }
270
  }
271

272
  return score;
273
}
274

275
} // namespace
276

277
double
278
fastq::complexity() const
17✔
279
{
280
  if (m_sequence.length() < 2) {
34✔
281
    return 0.0;
282
  }
283

284
  // Try to use unrolled/vectorized algorithm
285
  int score = fast_calculate_complexity(m_sequence);
14✔
286

287
  if (score < 0) {
14✔
288
    // If the sequence contains Ns then use the slower calculation, that does
289
    // not treat Ns as distinct bases and thereby does not inflate the score
290
    char prev = 'N';
7✔
291
    for (const auto nuc : m_sequence) {
140✔
292
      if (nuc != 'N' && nuc != prev) {
28✔
293
        prev = nuc;
14✔
294
        score++;
14✔
295
      }
296
    }
297
  }
298

299
  return std::max(0.0, score / static_cast<double>(m_sequence.length() - 1));
42✔
300
}
301

302
fastq::ntrimmed
303
fastq::trim_trailing_bases(const bool trim_ns,
9✔
304
                           char low_quality,
305
                           const bool preserve5p)
306
{
307
  low_quality += PHRED_OFFSET_MIN;
9✔
308
  auto is_quality_base = [&](size_t i) {
9✔
309
    return m_qualities.at(i) > low_quality &&
69✔
310
           (!trim_ns || m_sequence.at(i) != 'N');
38✔
311
  };
9✔
312

313
  size_t right_exclusive = 0;
9✔
314
  for (size_t i = m_sequence.length(); i; --i) {
30✔
315
    if (is_quality_base(i - 1)) {
19✔
316
      right_exclusive = i;
317
      break;
318
    }
319
  }
320

321
  size_t left_inclusive = 0;
9✔
322
  for (size_t i = 0; !preserve5p && i < right_exclusive; ++i) {
14✔
323
    if (is_quality_base(i)) {
10✔
324
      left_inclusive = i;
325
      break;
326
    }
327
  }
328

329
  return trim_sequence_and_qualities(left_inclusive, right_exclusive);
18✔
330
}
331

332
//! Calculates the size of the sliding window for quality trimming given a
333
//! read length and a user-defined window-size (fraction or whole number).
334
size_t
335
calculate_winlen(const size_t read_length, double window_size)
48✔
336
{
337
  if (window_size < 1.0) {
×
338
    window_size = window_size * static_cast<double>(read_length);
10✔
339
  }
340

341
  const auto winlen = static_cast<size_t>(window_size);
48✔
342
  if (winlen == 0 || winlen > read_length) {
48✔
343
    return read_length;
10✔
344
  }
345

346
  return winlen;
347
}
348

349
fastq::ntrimmed
350
fastq::trim_windowed_bases(const bool trim_ns,
53✔
351
                           char low_quality,
352
                           const double window_size,
353
                           const bool preserve5p)
354
{
355
  AR_REQUIRE(window_size >= 0.0);
61✔
356
  if (m_sequence.empty()) {
102✔
357
    return {};
6✔
358
  }
359

360
  low_quality += PHRED_OFFSET_MIN;
48✔
361
  auto is_quality_base = [&](size_t i) {
48✔
362
    return m_qualities.at(i) > low_quality &&
593✔
363
           (!trim_ns || m_sequence.at(i) != 'N');
266✔
364
  };
48✔
365

366
  const size_t winlen = calculate_winlen(length(), window_size);
96✔
367
  long running_sum =
48✔
368
    std::accumulate(m_qualities.begin(), m_qualities.begin() + winlen, 0);
192✔
369

370
  size_t left_inclusive = std::string::npos;
48✔
371
  size_t right_exclusive = std::string::npos;
48✔
372
  for (size_t offset = 0; offset + winlen <= length(); ++offset) {
538✔
373
    const long running_avg = running_sum / static_cast<long>(winlen);
261✔
374

375
    // We trim away low quality bases and Ns from the start of reads,
376
    // **before** we consider windows.
377
    if (left_inclusive == std::string::npos && is_quality_base(offset) &&
261✔
378
        running_avg > low_quality) {
42✔
379
      left_inclusive = offset;
380
    }
381

382
    if (left_inclusive != std::string::npos &&
261✔
383
        (running_avg <= low_quality || offset + winlen == length())) {
362✔
384
      right_exclusive = offset;
385
      while (right_exclusive < length() && is_quality_base(right_exclusive)) {
366✔
386
        right_exclusive++;
143✔
387
      }
388

389
      break;
390
    }
391

392
    running_sum -= m_qualities.at(offset);
442✔
393
    if (offset + winlen < length()) {
442✔
394
      running_sum += m_qualities.at(offset + winlen);
426✔
395
    }
396
  }
397

398
  if (left_inclusive == std::string::npos) {
48✔
399
    // No starting window found. Trim all bases starting from start.
400
    return trim_sequence_and_qualities(length(), length());
24✔
401
  } else if (preserve5p) {
40✔
402
    left_inclusive = 0;
3✔
403
  }
404

405
  AR_REQUIRE(right_exclusive != std::string::npos);
40✔
406
  return trim_sequence_and_qualities(left_inclusive, right_exclusive);
40✔
407
}
408

409
fastq::ntrimmed
410
fastq::mott_trimming(const double error_limit, const bool preserve5p)
7✔
411
{
412
  AR_REQUIRE(error_limit >= 0 && error_limit <= 1);
7✔
413

414
  size_t left_inclusive_temp = 0;
415
  size_t left_inclusive = 0;
416
  size_t right_exclusive = 0;
417

418
  double error_sum = 0.0;
419
  double error_sum_max = 0.0;
420

421
  for (size_t i = 0; i < length(); i++) {
190✔
422
    char phred = m_qualities.at(i) - PHRED_OFFSET_MIN;
176✔
423

424
    // Reduce weighting of very low-quality bases (inspired by seqtk) and
425
    // normalize Ns. The latter is not expected to matter for most data, but may
426
    // be relevant for some old/weird data and masked FASTQ reads.
427
    if (phred < 3 || m_sequence.at(i) == 'N') {
132✔
428
      phred = 3;
429
    }
430

431
    error_sum += error_limit - g_phred_to_p.at(phred);
176✔
432

433
    if (error_sum < 0.0) {
88✔
434
      // End of current segment (if any)
435
      left_inclusive_temp = i + 1;
52✔
436
      error_sum = 0;
52✔
437
    } else if (error_sum > error_sum_max) {
36✔
438
      // Extend best segment, possibly replacing the previous candidate
439
      left_inclusive = left_inclusive_temp;
28✔
440
      right_exclusive = i + 1;
28✔
441
      error_sum_max = error_sum;
28✔
442
    }
443
  }
444

445
  return trim_sequence_and_qualities(preserve5p ? 0 : left_inclusive,
13✔
446
                                     right_exclusive);
7✔
447
}
448

449
std::pair<char, size_t>
450
fastq::poly_x_trimming(const std::string& nucleotides, size_t min_length)
51✔
451
{
452
  size_t best_count = 0;
51✔
453
  char best_nucleotide = 'N';
51✔
454
  if (m_sequence.length() >= min_length && !nucleotides.empty()) {
146✔
455
    // Looping over all nucleotides ended up faster than a single pass algorithm
456
    for (const auto nucleotide : nucleotides) {
472✔
457
      const auto count = count_poly_x_tail(m_sequence, nucleotide, min_length);
75✔
458
      if (count > best_count) {
75✔
459
        best_nucleotide = nucleotide;
28✔
460
        best_count = count;
28✔
461
      }
462
    }
463

464
    truncate(0, length() - best_count);
86✔
465
  }
466

467
  return { best_nucleotide, best_count };
153✔
468
}
469

470
void
471
fastq::truncate(size_t pos, size_t len)
248✔
472
{
473
  AR_REQUIRE(pos == 0 || pos <= length());
303✔
474

475
  if (pos) {
247✔
476
    m_sequence.erase(0, pos);
50✔
477
    m_qualities.erase(0, pos);
50✔
478
  }
479

480
  if (len < length()) {
494✔
481
    m_sequence.erase(len);
128✔
482
    m_qualities.erase(len);
128✔
483
  }
484
}
247✔
485

486
void
487
fastq::reverse_complement()
2✔
488
{
489
  std::reverse(m_sequence.begin(), m_sequence.end());
6✔
490
  std::reverse(m_qualities.begin(), m_qualities.end());
6✔
491

492
  // Lookup table for complementary bases based only on the last 4 bits
493
  static const char complements[] = "-T-GA--C------N-";
2✔
494
  for (auto& nuc : m_sequence) {
38✔
495
    nuc = complements[nuc & 0xf];
10✔
496
  }
497
}
2✔
498

499
void
500
fastq::add_prefix_to_name(const std::string& prefix)
3✔
501
{
502
  if (!prefix.empty()) {
6✔
503
    m_header.insert(0, prefix);
2✔
504
  }
505
}
3✔
506

507
bool
508
fastq::read(line_reader_base& reader, const fastq_encoding& encoding)
28✔
509
{
510
  if (read_unsafe(reader)) {
28✔
511
    post_process(encoding);
10✔
512
    return true;
10✔
513
  }
514

515
  return false;
516
}
517

518
bool
519
fastq::read_unsafe(line_reader_base& reader)
28✔
520
{
521
  do {
35✔
522
    if (!reader.getline(m_header)) {
35✔
523
      // End of file; terminate gracefully
524
      return false;
525
    }
526
  } while (m_header.empty());
58✔
527

528
  if (m_header.size() < 2 || m_header.at(0) != '@') {
65✔
529
    throw fastq_error("Malformed or empty FASTQ header");
3✔
530
  } else {
531
    // FIXME: Erasing the '@' and then re-adding it later is pointless work
532
    m_header.erase(0, 1);
21✔
533
  }
534

535
  if (!reader.getline(m_sequence)) {
21✔
536
    throw fastq_error("partial FASTQ record; cut off after header");
3✔
537
  } else if (m_sequence.empty()) {
40✔
538
    throw fastq_error("sequence is empty");
6✔
539
  }
540

541
  // Most of the time this will only be '+' and not require an allocation
542
  std::string line;
18✔
543
  if (!reader.getline(line)) {
18✔
544
    throw fastq_error("partial FASTQ record; cut off after sequence");
3✔
545
  } else if (line.empty() || line.at(0) != '+') {
50✔
546
    throw fastq_error("FASTQ record lacks separator character (+)");
3✔
547
  }
548

549
  if (!reader.getline(m_qualities)) {
16✔
550
    throw fastq_error("partial FASTQ record; cut off after separator");
6✔
551
  } else if (m_qualities.length() != m_sequence.length()) {
42✔
552
    throw fastq_error("sequence/quality lengths do not match");
12✔
553
  }
554

555
  return true;
10✔
556
}
24✔
557

558
///////////////////////////////////////////////////////////////////////////////
559
// Public helper functions
560

561
char
562
fastq::p_to_phred_33(double p)
×
563
{
564
  // Lowest possible error rate representable is '~' (~5e-10)
565
  const auto min_p = std::max(5e-10, p);
×
566
  const auto raw_score = static_cast<int>(-10.0 * std::log10(min_p));
×
567

568
  return std::min<int>(PHRED_OFFSET_MAX, PHRED_OFFSET_MIN + raw_score);
×
569
}
570

571
char
572
fastq::guess_mate_separator(const std::vector<fastq>& reads_1,
12✔
573
                            const std::vector<fastq>& reads_2)
574
{
575
  AR_REQUIRE(reads_1.size() == reads_2.size());
44✔
576

577
  // Commonly used characters
578
  const std::string candidates = "/.:";
20✔
579

580
  for (auto candidate : candidates) {
95✔
581
    auto it_1 = reads_1.begin();
19✔
582
    auto it_2 = reads_2.begin();
19✔
583

584
    bool any_failures = false;
19✔
585
    while (it_1 != reads_1.end()) {
75✔
586
      const auto info1 = get_mate_info(*it_1++, candidate);
60✔
587
      const auto info2 = get_mate_info(*it_2++, candidate);
60✔
588

589
      if (info1.name != info2.name) {
20✔
590
        any_failures = true;
12✔
591
        break;
12✔
592
      }
593

594
      const auto mate_1 = info1.mate;
8✔
595
      const auto mate_2 = info2.mate;
8✔
596

597
      if (mate_1 != read_mate::unknown || mate_2 != read_mate::unknown) {
8✔
598
        if (mate_1 == mate_2) {
7✔
599
          // This could be valid data that just happens to include a known
600
          // mate separator in the name. But this could also happen if the
601
          // same reads are used for both mate 1 and mate 2, so we cannot
602
          // safely guess.
603
          return 0;
2✔
604
        } else if (mate_1 != read_mate::mate_1 || mate_2 != read_mate::mate_2) {
6✔
605
          // The mate separator seems to be correct, but the mate information
606
          // does not match: One mate is missing information or the order is
607
          // wrong. Return the identified separator and raise an error later.
608
          return candidate;
1✔
609
        }
610
      }
611
    }
612

613
    if (!any_failures) {
12✔
614
      return candidate;
5✔
615
    }
616
  }
617

618
  return 0;
3✔
619
}
10✔
620

621
void
622
fastq::normalize_paired_reads(fastq& mate1, fastq& mate2, char mate_separator)
23✔
623
{
624
  if (mate1.length() == 0 || mate2.length() == 0) {
67✔
625
    throw fastq_error("Pair contains empty reads");
9✔
626
  }
627

628
  const auto info1 = get_mate_info(mate1, mate_separator);
20✔
629
  const auto info2 = get_mate_info(mate2, mate_separator);
20✔
630

631
  if (info1.name != info2.name) {
20✔
632
    std::ostringstream error;
9✔
633
    error << "Pair contains reads with mismatching names:\n"
9✔
634
          << " - '" << info1.name << "'\n"
635
          << " - '" << info2.name << "'";
27✔
636

637
    if (info1.mate == read_mate::unknown || info2.mate == read_mate::unknown) {
9✔
638
      error << "\n\nNote that AdapterRemoval by determines the mate "
8✔
639
               "numbers as the digit found at the end of the read name, "
640
               "if this is preceded by";
8✔
641

642
      if (mate_separator) {
8✔
643
        error << "the character '" << mate_separator << "'";
8✔
644
      } else {
645
        error << "a character such as '/'";
×
646
      }
647

648
      error << "; if these data makes use of a different character to "
8✔
649
               "separate the mate number from the read name, then you "
650
               "will need to set the --mate-separator command-line "
651
               "option to the appropriate character.";
8✔
652
    }
653

654
    throw fastq_error(error.str());
36✔
655
  }
9✔
656

657
  if (info1.mate != read_mate::unknown || info2.mate != read_mate::unknown) {
11✔
658
    if (info1.mate != read_mate::mate_1 || info2.mate != read_mate::mate_2) {
9✔
659
      std::ostringstream error;
6✔
660
      error << "Inconsistent mate numbering; please verify data:\n"
6✔
661
            << "\nRead 1 identified as " << info1.desc() << ": " << mate1.name()
6✔
662
            << "\nRead 2 identified as " << info2.desc() << ": "
663
            << mate2.name();
24✔
664

665
      throw fastq_error(error.str());
24✔
666
    }
6✔
667

668
    AR_REQUIRE(info1.sep_pos == info2.sep_pos);
3✔
669
    mate1.m_header.at(info1.sep_pos) = MATE_SEPARATOR;
6✔
670
    mate2.m_header.at(info2.sep_pos) = MATE_SEPARATOR;
6✔
671
  }
672
}
5✔
673

674
///////////////////////////////////////////////////////////////////////////////
675
// Private helper functions
676

677
void
678
fastq::post_process(const fastq_encoding& encoding)
1,208✔
679
{
680
  for (char& nuc : m_sequence) {
25,947✔
681
    // Fast ASCII letter uppercase
682
    switch (nuc &= 0xDF) {
7,041✔
683
      case 'A':
7,037✔
684
      case 'C':
7,037✔
685
      case 'G':
7,037✔
686
      case 'T':
7,037✔
687
      case 'N':
7,037✔
688
        break;
7,037✔
689

690
      default:
4✔
691
        throw fastq_error("invalid character in FASTQ sequence; "
12✔
692
                          "only A, C, G, T and N are expected!");
8✔
693
    }
694
  }
695

696
  encoding.decode(m_qualities);
1,204✔
697
}
1,192✔
698

699
fastq::ntrimmed
700
fastq::trim_sequence_and_qualities(const size_t left_inclusive,
64✔
701
                                   const size_t right_exclusive)
702
{
703
  const ntrimmed summary(left_inclusive, length() - right_exclusive);
192✔
704
  truncate(left_inclusive, right_exclusive - left_inclusive);
64✔
705

706
  return summary;
64✔
707
}
708

709
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc