• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #36

22 Jul 2024 09:33AM UTC coverage: 87.26% (-12.7%) from 100.0%
#36

push

travis-ci

MikkelSchubert
remove duplicate tests

2185 of 2504 relevant lines covered (87.26%)

16293.15 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.85
/src/fastq.cpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2011 by Stinus Lindgreen - stinus@binf.ku.dk            *
5
 * Copyright (C) 2014 by Mikkel Schubert - mikkelsch@gmail.com           *
6
 *                                                                       *
7
 * This program is free software: you can redistribute it and/or modify  *
8
 * it under the terms of the GNU General Public License as published by  *
9
 * the Free Software Foundation, either version 3 of the License, or     *
10
 * (at your option) any later version.                                   *
11
 *                                                                       *
12
 * This program is distributed in the hope that it will be useful,       *
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
15
 * GNU General Public License for more details.                          *
16
 *                                                                       *
17
 * You should have received a copy of the GNU General Public License     *
18
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
19
\*************************************************************************/
20
#include "fastq.hpp"
21
#include "debug.hpp"      // for AR_REQUIRE, AR_FAIL
22
#include "errors.hpp"     // for fastq_error
23
#include "linereader.hpp" // for line_reader_base
24
#include <algorithm>      // for reverse, count, max, min
25
#include <cmath>          // for log10, pow
26
#include <numeric>        // for accumulate
27
#include <sstream>        // for ostringstream
28

29
namespace adapterremoval {
30

31
const size_t ACGT::size;
32
const std::array<ACGT::value_type, ACGT::size> ACGT::values = { 'A',
33
                                                                'C',
34
                                                                'G',
35
                                                                'T' };
36

37
const size_t ACGTN::size;
38
const std::array<ACGTN::value_type, ACGTN::size> ACGTN::values = { 'A',
39
                                                                   'C',
40
                                                                   'G',
41
                                                                   'T',
42
                                                                   'N' };
43

44
namespace {
45

46
std::vector<double>
47
init_phred_to_p_values()
1✔
48
{
49
  std::vector<double> result;
1✔
50
  for (size_t i = PHRED_SCORE_MIN; i <= PHRED_SCORE_MAX; ++i) {
95✔
51
    result.push_back(std::pow(10.0, static_cast<double>(i) / -10.0));
188✔
52
  }
53

54
  return result;
1✔
55
}
×
56

57
const std::vector<double> g_phred_to_p = init_phred_to_p_values();
58

59
enum class read_mate
60
{
61
  unknown,
62
  mate_1,
63
  mate_2,
64
};
65

66
struct mate_info
240✔
67
{
68
  std::string desc() const
12✔
69
  {
70
    switch (mate) {
12✔
71
      case read_mate::unknown:
4✔
72
        return "unknown";
8✔
73
      case read_mate::mate_1:
5✔
74
        return "mate 1";
10✔
75
      case read_mate::mate_2:
3✔
76
        return "mate 2";
6✔
77
      default:
×
78
        AR_FAIL("Invalid mate in mate_info::desc");
×
79
    }
80
  }
81

82
  //! Read name without mate number or meta-data
83
  std::string name{};
84
  //! Which mate in a pair, if identified
85
  read_mate mate = read_mate::unknown;
86
  //! Position of the separator character in the header (if any)
87
  size_t sep_pos = std::string::npos;
88
};
89

90
mate_info
91
get_mate_info(const fastq& read, char mate_separator)
80✔
92
{
93
  const std::string& header = read.header();
80✔
94

95
  size_t pos = header.find_first_of(' ');
80✔
96
  if (pos == std::string::npos) {
80✔
97
    pos = header.length();
156✔
98
  }
99

100
  mate_info info;
80✔
101
  if (pos >= 2 && header.at(pos - 2) == mate_separator) {
160✔
102
    const char digit = header.at(pos - 1);
92✔
103

104
    if (digit == '1') {
46✔
105
      info.mate = read_mate::mate_1;
22✔
106
      pos -= 2;
22✔
107
      info.sep_pos = pos;
22✔
108
    } else if (digit == '2') {
24✔
109
      info.mate = read_mate::mate_2;
15✔
110
      pos -= 2;
15✔
111
      info.sep_pos = pos;
15✔
112
    }
113
  }
114

115
  info.name = header.substr(0, pos);
160✔
116
  return info;
80✔
117
}
×
118

119
size_t
120
count_poly_x_tail(const std::string& m_sequence,
75✔
121
                  const char nucleotide,
122
                  const size_t min_length)
123
{
124
  // Maximum number of sequential mismatches
125
  const size_t max_seq_mismatches = 2;
75✔
126
  // Number of called bases required per mismatch (via fastp)
127
  const size_t min_bases_per_mismatch = 8;
75✔
128

129
  //! Number of bases in the alignment to trim, excluding leading mismatches
130
  size_t n_trim = 0;
75✔
131
  //! Number of bases in the alignment
132
  size_t n_bases = 0;
75✔
133
  //! Number of uncalled bases (Ns) in the alignment
134
  size_t n_uncalled = 0;
75✔
135
  //! Number of mismatches in the alignment
136
  size_t n_mismatches = 0;
75✔
137
  //! Current number of sequential mismatches in the alignment
138
  size_t n_seq_mismatches = 0;
75✔
139

140
  for (auto it = m_sequence.rbegin(); it != m_sequence.rend(); ++it) {
1,852✔
141
    n_bases++;
440✔
142

143
    if (*it == nucleotide) {
880✔
144
      n_trim = n_bases;
145
      n_seq_mismatches = 0;
146
    } else if (*it == 'N') {
240✔
147
      n_uncalled++;
14✔
148
      // Trailing Ns are allowed only after a match
149
      if (!n_seq_mismatches) {
14✔
150
        n_trim = n_bases;
12✔
151
      }
152
    } else {
153
      n_mismatches++;
106✔
154
      n_seq_mismatches++;
106✔
155
      if (n_seq_mismatches > max_seq_mismatches ||
212✔
156
          n_mismatches > std::max(min_length, n_bases - n_uncalled) /
264✔
157
                           min_bases_per_mismatch) {
158
        // The final mismatch is not counted as part of the alignment
159
        n_bases--;
160
        break;
161
      }
162
    }
163
  }
164

165
  if (n_bases - n_uncalled >= min_length) {
75✔
166
    return n_trim;
28✔
167
  }
168

169
  return 0;
170
}
171

172
} // namespace
173

174
///////////////////////////////////////////////////////////////////////////////
175
// fastq
176

177
fastq::fastq()
147✔
178
  : m_header()
147✔
179
  , m_sequence()
147✔
180
  , m_qualities()
147✔
181
{
182
}
147✔
183

184
fastq::fastq(std::string header,
725✔
185
             std::string sequence,
186
             std::string qualities,
187
             const fastq_encoding& encoding)
725✔
188
  : m_header(std::move(header))
745✔
189
  , m_sequence(std::move(sequence))
745✔
190
  , m_qualities(std::move(qualities))
745✔
191
{
192
  if (m_qualities.length() != m_sequence.length()) {
2,175✔
193
    throw fastq_error(
12✔
194
      "invalid FASTQ record; sequence/quality length does not match");
8✔
195
  }
196

197
  post_process(encoding);
721✔
198
}
805✔
199

200
fastq::fastq(std::string header, std::string sequence)
474✔
201
  : m_header(std::move(header))
474✔
202
  , m_sequence(std::move(sequence))
474✔
203
  , m_qualities(std::string(m_sequence.length(), '!'))
1,422✔
204
{
205
  post_process(FASTQ_ENCODING_33);
474✔
206
}
474✔
207

208
bool
209
fastq::operator==(const fastq& other) const
240✔
210
{
211
  return (m_header == other.m_header) && (m_sequence == other.m_sequence) &&
480✔
212
         (m_qualities == other.m_qualities);
240✔
213
}
214

215
size_t
216
fastq::count_ns() const
6✔
217
{
218
  return static_cast<size_t>(
6✔
219
    std::count(m_sequence.begin(), m_sequence.end(), 'N'));
24✔
220
}
221

222
namespace {
223

224
/**
225
 * Calculate the absolute sequence complexity score, under the assumption that
226
 * the sequence does not contain Ns. Should the sequence contain Ns, then this
227
 * algorithm would overestimate the sequence complexity, and therefore returns
228
 * -1 to indicate failure.
229
 */
230
int
231
fast_calculate_complexity(const std::string& sequence)
14✔
232
{
233
  // The last base is not checked in the loop below
234
  if (sequence.back() == 'N') {
28✔
235
    return -1;
236
  }
237

238
  const size_t length = sequence.length() - 1;
10✔
239
  size_t i = 0;
10✔
240
  int score = 0;
10✔
241

242
  // Fixed block sizes allows gcc/clang to optimize the loop
243
  const size_t BLOCK_SIZE = 16;
10✔
244
  for (; i + BLOCK_SIZE < length; i += BLOCK_SIZE) {
10✔
245
    for (size_t j = 0; j < BLOCK_SIZE; ++j, ++i) {
×
246
      if (sequence[i] != sequence[i + 1]) {
×
247
        score++;
×
248
      }
249

250
      if (sequence[i] == 'N') {
×
251
        return -1;
252
      }
253
    }
254
  }
255

256
  for (; i < length; ++i) {
35✔
257
    if (sequence[i] != sequence[i + 1]) {
84✔
258
      score++;
21✔
259
    }
260

261
    if (sequence[i] == 'N') {
56✔
262
      return -1;
263
    }
264
  }
265

266
  return score;
267
}
268

269
} // namespace
270

271
double
272
fastq::complexity() const
17✔
273
{
274
  if (m_sequence.length() < 2) {
34✔
275
    return 0.0;
276
  }
277

278
  // Try to use unrolled/vectorized algorithm
279
  int score = fast_calculate_complexity(m_sequence);
14✔
280

281
  if (score < 0) {
14✔
282
    // If the sequence contains Ns then use the slower calculation, that does
283
    // not treat Ns as distinct bases and thereby does not inflate the score
284
    char prev = 'N';
7✔
285
    for (const auto nuc : m_sequence) {
140✔
286
      if (nuc != 'N' && nuc != prev) {
28✔
287
        prev = nuc;
14✔
288
        score++;
14✔
289
      }
290
    }
291
  }
292

293
  return std::max(0.0, score / static_cast<double>(m_sequence.length() - 1));
42✔
294
}
295

296
fastq::ntrimmed
297
fastq::trim_trailing_bases(const bool trim_ns,
9✔
298
                           char low_quality,
299
                           const bool preserve5p)
300
{
301
  low_quality += PHRED_OFFSET_MIN;
9✔
302
  auto is_quality_base = [&](size_t i) {
9✔
303
    return m_qualities.at(i) > low_quality &&
69✔
304
           (!trim_ns || m_sequence.at(i) != 'N');
38✔
305
  };
9✔
306

307
  size_t right_exclusive = 0;
9✔
308
  for (size_t i = m_sequence.length(); i; --i) {
30✔
309
    if (is_quality_base(i - 1)) {
19✔
310
      right_exclusive = i;
311
      break;
312
    }
313
  }
314

315
  size_t left_inclusive = 0;
9✔
316
  for (size_t i = 0; !preserve5p && i < right_exclusive; ++i) {
14✔
317
    if (is_quality_base(i)) {
10✔
318
      left_inclusive = i;
319
      break;
320
    }
321
  }
322

323
  return trim_sequence_and_qualities(left_inclusive, right_exclusive);
18✔
324
}
325

326
//! Calculates the size of the sliding window for quality trimming given a
327
//! read length and a user-defined window-size (fraction or whole number).
328
size_t
329
calculate_winlen(const size_t read_length, double window_size)
48✔
330
{
331
  if (window_size < 1.0) {
×
332
    window_size = window_size * static_cast<double>(read_length);
10✔
333
  }
334

335
  const auto winlen = static_cast<size_t>(window_size);
48✔
336
  if (winlen == 0 || winlen > read_length) {
48✔
337
    return read_length;
10✔
338
  }
339

340
  return winlen;
341
}
342

343
fastq::ntrimmed
344
fastq::trim_windowed_bases(const bool trim_ns,
53✔
345
                           char low_quality,
346
                           const double window_size,
347
                           const bool preserve5p)
348
{
349
  AR_REQUIRE(window_size >= 0.0);
61✔
350
  if (m_sequence.empty()) {
102✔
351
    return {};
6✔
352
  }
353

354
  low_quality += PHRED_OFFSET_MIN;
48✔
355
  auto is_quality_base = [&](size_t i) {
48✔
356
    return m_qualities.at(i) > low_quality &&
593✔
357
           (!trim_ns || m_sequence.at(i) != 'N');
266✔
358
  };
48✔
359

360
  const size_t winlen = calculate_winlen(length(), window_size);
96✔
361
  long running_sum =
48✔
362
    std::accumulate(m_qualities.begin(), m_qualities.begin() + winlen, 0);
192✔
363

364
  size_t left_inclusive = std::string::npos;
48✔
365
  size_t right_exclusive = std::string::npos;
48✔
366
  for (size_t offset = 0; offset + winlen <= length(); ++offset) {
538✔
367
    const long running_avg = running_sum / static_cast<long>(winlen);
261✔
368

369
    // We trim away low quality bases and Ns from the start of reads,
370
    // **before** we consider windows.
371
    if (left_inclusive == std::string::npos && is_quality_base(offset) &&
261✔
372
        running_avg > low_quality) {
42✔
373
      left_inclusive = offset;
374
    }
375

376
    if (left_inclusive != std::string::npos &&
261✔
377
        (running_avg <= low_quality || offset + winlen == length())) {
362✔
378
      right_exclusive = offset;
379
      while (right_exclusive < length() && is_quality_base(right_exclusive)) {
366✔
380
        right_exclusive++;
143✔
381
      }
382

383
      break;
384
    }
385

386
    running_sum -= m_qualities.at(offset);
442✔
387
    if (offset + winlen < length()) {
442✔
388
      running_sum += m_qualities.at(offset + winlen);
426✔
389
    }
390
  }
391

392
  if (left_inclusive == std::string::npos) {
48✔
393
    // No starting window found. Trim all bases starting from start.
394
    return trim_sequence_and_qualities(length(), length());
24✔
395
  } else if (preserve5p) {
40✔
396
    left_inclusive = 0;
3✔
397
  }
398

399
  AR_REQUIRE(right_exclusive != std::string::npos);
40✔
400
  return trim_sequence_and_qualities(left_inclusive, right_exclusive);
40✔
401
}
402

403
fastq::ntrimmed
404
fastq::mott_trimming(const double error_limit, const bool preserve5p)
7✔
405
{
406
  AR_REQUIRE(error_limit >= 0 && error_limit <= 1);
7✔
407

408
  size_t left_inclusive_temp = 0;
409
  size_t left_inclusive = 0;
410
  size_t right_exclusive = 0;
411

412
  double error_sum = 0.0;
413
  double error_sum_max = 0.0;
414

415
  for (size_t i = 0; i < length(); i++) {
190✔
416
    char phred = m_qualities.at(i) - PHRED_OFFSET_MIN;
176✔
417

418
    // Reduce weighting of very low-quality bases (inspired by seqtk) and
419
    // normalize Ns. The latter is not expected to matter for most data, but may
420
    // be relevant for some old/weird data and masked FASTQ reads.
421
    if (phred < 3 || m_sequence.at(i) == 'N') {
132✔
422
      phred = 3;
423
    }
424

425
    error_sum += error_limit - g_phred_to_p.at(phred);
176✔
426

427
    if (error_sum < 0.0) {
88✔
428
      // End of current segment (if any)
429
      left_inclusive_temp = i + 1;
52✔
430
      error_sum = 0;
52✔
431
    } else if (error_sum > error_sum_max) {
36✔
432
      // Extend best segment, possibly replacing the previous candidate
433
      left_inclusive = left_inclusive_temp;
28✔
434
      right_exclusive = i + 1;
28✔
435
      error_sum_max = error_sum;
28✔
436
    }
437
  }
438

439
  return trim_sequence_and_qualities(preserve5p ? 0 : left_inclusive,
13✔
440
                                     right_exclusive);
7✔
441
}
442

443
std::pair<char, size_t>
444
fastq::poly_x_trimming(const std::string& nucleotides, size_t min_length)
51✔
445
{
446
  size_t best_count = 0;
51✔
447
  char best_nucleotide = 'N';
51✔
448
  if (m_sequence.length() >= min_length && !nucleotides.empty()) {
146✔
449
    // Looping over all nucleotides ended up faster than a single pass algorithm
450
    for (const auto nucleotide : nucleotides) {
472✔
451
      const auto count = count_poly_x_tail(m_sequence, nucleotide, min_length);
75✔
452
      if (count > best_count) {
75✔
453
        best_nucleotide = nucleotide;
28✔
454
        best_count = count;
28✔
455
      }
456
    }
457

458
    truncate(0, length() - best_count);
86✔
459
  }
460

461
  return { best_nucleotide, best_count };
153✔
462
}
463

464
void
465
fastq::truncate(size_t pos, size_t len)
248✔
466
{
467
  AR_REQUIRE(pos == 0 || pos <= length());
303✔
468

469
  if (pos) {
247✔
470
    m_sequence.erase(0, pos);
50✔
471
    m_qualities.erase(0, pos);
50✔
472
  }
473

474
  if (len < length()) {
494✔
475
    m_sequence.erase(len);
128✔
476
    m_qualities.erase(len);
128✔
477
  }
478
}
247✔
479

480
void
481
fastq::reverse_complement()
2✔
482
{
483
  std::reverse(m_sequence.begin(), m_sequence.end());
6✔
484
  std::reverse(m_qualities.begin(), m_qualities.end());
6✔
485

486
  // Lookup table for complementary bases based only on the last 4 bits
487
  static const char complements[] = "-T-GA--C------N-";
2✔
488
  for (auto& nuc : m_sequence) {
38✔
489
    nuc = complements[nuc & 0xf];
10✔
490
  }
491
}
2✔
492

493
void
494
fastq::add_prefix_to_name(const std::string& prefix)
3✔
495
{
496
  if (!prefix.empty()) {
6✔
497
    m_header.insert(0, prefix);
2✔
498
  }
499
}
3✔
500

501
bool
502
fastq::read(line_reader_base& reader, const fastq_encoding& encoding)
28✔
503
{
504
  if (read_unsafe(reader)) {
28✔
505
    post_process(encoding);
10✔
506
    return true;
10✔
507
  }
508

509
  return false;
510
}
511

512
bool
513
fastq::read_unsafe(line_reader_base& reader)
28✔
514
{
515
  do {
35✔
516
    if (!reader.getline(m_header)) {
35✔
517
      // End of file; terminate gracefully
518
      return false;
519
    }
520
  } while (m_header.empty());
58✔
521

522
  if (m_header.size() < 2 || m_header.at(0) != '@') {
65✔
523
    throw fastq_error("Malformed or empty FASTQ header");
3✔
524
  } else {
525
    // FIXME: Erasing the '@' and then re-adding it later is pointless work
526
    m_header.erase(0, 1);
21✔
527
  }
528

529
  if (!reader.getline(m_sequence)) {
21✔
530
    throw fastq_error("partial FASTQ record; cut off after header");
3✔
531
  } else if (m_sequence.empty()) {
40✔
532
    throw fastq_error("sequence is empty");
6✔
533
  }
534

535
  // Most of the time this will only be '+' and not require an allocation
536
  std::string line;
18✔
537
  if (!reader.getline(line)) {
18✔
538
    throw fastq_error("partial FASTQ record; cut off after sequence");
3✔
539
  } else if (line.empty() || line.at(0) != '+') {
50✔
540
    throw fastq_error("FASTQ record lacks separator character (+)");
3✔
541
  }
542

543
  if (!reader.getline(m_qualities)) {
16✔
544
    throw fastq_error("partial FASTQ record; cut off after separator");
6✔
545
  } else if (m_qualities.length() != m_sequence.length()) {
42✔
546
    throw fastq_error("sequence/quality lengths do not match");
12✔
547
  }
548

549
  return true;
10✔
550
}
24✔
551

552
void
553
fastq::into_string(std::string& dst) const
2✔
554
{
555
  dst.push_back('@');
2✔
556
  dst.append(m_header);
2✔
557
  dst.push_back('\n');
2✔
558
  dst.append(m_sequence);
2✔
559
  dst.append("\n+\n", 3);
2✔
560
  dst.append(m_qualities);
2✔
561
  dst.push_back('\n');
2✔
562
}
2✔
563

564
///////////////////////////////////////////////////////////////////////////////
565
// Public helper functions
566

567
char
568
fastq::p_to_phred_33(double p)
×
569
{
570
  // Lowest possible error rate representable is '~' (~5e-10)
571
  const auto min_p = std::max(5e-10, p);
×
572
  const auto raw_score = static_cast<int>(-10.0 * std::log10(min_p));
×
573

574
  return std::min<int>(PHRED_OFFSET_MAX, PHRED_OFFSET_MIN + raw_score);
×
575
}
576

577
char
578
fastq::guess_mate_separator(const std::vector<fastq>& reads_1,
12✔
579
                            const std::vector<fastq>& reads_2)
580
{
581
  AR_REQUIRE(reads_1.size() == reads_2.size());
44✔
582

583
  // Commonly used characters
584
  const std::string candidates = "/.:";
20✔
585

586
  for (auto candidate : candidates) {
95✔
587
    auto it_1 = reads_1.begin();
19✔
588
    auto it_2 = reads_2.begin();
19✔
589

590
    bool any_failures = false;
19✔
591
    while (it_1 != reads_1.end()) {
75✔
592
      const auto info1 = get_mate_info(*it_1++, candidate);
60✔
593
      const auto info2 = get_mate_info(*it_2++, candidate);
60✔
594

595
      if (info1.name != info2.name) {
20✔
596
        any_failures = true;
12✔
597
        break;
12✔
598
      }
599

600
      const auto mate_1 = info1.mate;
8✔
601
      const auto mate_2 = info2.mate;
8✔
602

603
      if (mate_1 != read_mate::unknown || mate_2 != read_mate::unknown) {
8✔
604
        if (mate_1 == mate_2) {
7✔
605
          // This could be valid data that just happens to include a known
606
          // mate separator in the name. But this could also happen if the
607
          // same reads are used for both mate 1 and mate 2, so we cannot
608
          // safely guess.
609
          return 0;
610
        } else if (mate_1 != read_mate::mate_1 || mate_2 != read_mate::mate_2) {
6✔
611
          // The mate separator seems to be correct, but the mate information
612
          // does not match: One mate is missing information or the order is
613
          // wrong. Return the identified separator and raise an error later.
614
          return candidate;
1✔
615
        }
616
      }
617
    }
40✔
618

619
    if (!any_failures) {
12✔
620
      return candidate;
5✔
621
    }
622
  }
623

624
  return 0;
3✔
625
}
10✔
626

627
void
628
fastq::normalize_paired_reads(fastq& mate1, fastq& mate2, char mate_separator)
23✔
629
{
630
  if (mate1.length() == 0 || mate2.length() == 0) {
67✔
631
    throw fastq_error("Pair contains empty reads");
9✔
632
  }
633

634
  const auto info1 = get_mate_info(mate1, mate_separator);
20✔
635
  const auto info2 = get_mate_info(mate2, mate_separator);
20✔
636

637
  if (info1.name != info2.name) {
20✔
638
    std::ostringstream error;
9✔
639
    error << "Pair contains reads with mismatching names:\n"
9✔
640
          << " - '" << info1.name << "'\n"
641
          << " - '" << info2.name << "'";
27✔
642

643
    if (info1.mate == read_mate::unknown || info2.mate == read_mate::unknown) {
9✔
644
      error << "\n\nNote that AdapterRemoval by determines the mate "
8✔
645
               "numbers as the digit found at the end of the read name, "
646
               "if this is preceded by the character '"
647
            << mate_separator
648
            << "'; if these data makes use of a different character to "
649
               "separate the mate number from the read name, then you "
650
               "will need to set the --mate-separator command-line "
651
               "option to the appropriate character.";
8✔
652
    }
653

654
    throw fastq_error(error.str());
36✔
655
  }
9✔
656

657
  if (info1.mate != read_mate::unknown || info2.mate != read_mate::unknown) {
11✔
658
    if (info1.mate != read_mate::mate_1 || info2.mate != read_mate::mate_2) {
9✔
659
      std::ostringstream error;
6✔
660
      error << "Inconsistent mate numbering; please verify data:\n"
6✔
661
            << "\nRead 1 identified as " << info1.desc() << ": " << mate1.name()
24✔
662
            << "\nRead 2 identified as " << info2.desc() << ": "
12✔
663
            << mate2.name();
36✔
664

665
      throw fastq_error(error.str());
24✔
666
    }
6✔
667

668
    AR_REQUIRE(info1.sep_pos == info2.sep_pos);
3✔
669
    mate1.m_header.at(info1.sep_pos) = MATE_SEPARATOR;
6✔
670
    mate2.m_header.at(info2.sep_pos) = MATE_SEPARATOR;
6✔
671
  }
672
}
45✔
673

674
///////////////////////////////////////////////////////////////////////////////
675
// Private helper functions
676

677
void
678
fastq::post_process(const fastq_encoding& encoding)
1,205✔
679
{
680
  for (char& nuc : m_sequence) {
25,953✔
681
    // Fast ASCII letter uppercase
682
    switch (nuc &= 0xDF) {
7,047✔
683
      case 'A':
7,043✔
684
      case 'C':
7,043✔
685
      case 'G':
7,043✔
686
      case 'T':
7,043✔
687
      case 'N':
7,043✔
688
        break;
7,043✔
689

690
      default:
4✔
691
        throw fastq_error("invalid character in FASTQ sequence; "
12✔
692
                          "only A, C, G, T and N are expected!");
8✔
693
    }
694
  }
695

696
  encoding.decode(m_qualities);
1,201✔
697
}
1,189✔
698

699
fastq::ntrimmed
700
fastq::trim_sequence_and_qualities(const size_t left_inclusive,
64✔
701
                                   const size_t right_exclusive)
702
{
703
  const ntrimmed summary(left_inclusive, length() - right_exclusive);
192✔
704
  truncate(left_inclusive, right_exclusive - left_inclusive);
64✔
705

706
  return summary;
64✔
707
}
708

709
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc