• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #39

20 Aug 2024 01:31PM UTC coverage: 79.602% (-3.8%) from 83.365%
#39

push

travis-ci

MikkelSchubert
update schema URL to use github pages

2279 of 2863 relevant lines covered (79.6%)

14257.45 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.91
/src/fastq.cpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2011 by Stinus Lindgreen - stinus@binf.ku.dk            *
5
 * Copyright (C) 2014 by Mikkel Schubert - mikkelsch@gmail.com           *
6
 *                                                                       *
7
 * This program is free software: you can redistribute it and/or modify  *
8
 * it under the terms of the GNU General Public License as published by  *
9
 * the Free Software Foundation, either version 3 of the License, or     *
10
 * (at your option) any later version.                                   *
11
 *                                                                       *
12
 * This program is distributed in the hope that it will be useful,       *
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
15
 * GNU General Public License for more details.                          *
16
 *                                                                       *
17
 * You should have received a copy of the GNU General Public License     *
18
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
19
\*************************************************************************/
20
#include "fastq.hpp"
21
#include "debug.hpp"      // for AR_REQUIRE, AR_FAIL
22
#include "errors.hpp"     // for fastq_error
23
#include "linereader.hpp" // for line_reader_base
24
#include <algorithm>      // for reverse, count, max, min
25
#include <cmath>          // for log10, pow
26
#include <numeric>        // for accumulate
27
#include <sstream>        // for ostringstream
28
#include <string_view>    // for string_view
29

30
namespace adapterremoval {
31

32
namespace {
33

34
std::vector<double>
35
init_phred_to_p_values()
1✔
36
{
37
  std::vector<double> result;
1✔
38
  for (size_t i = PHRED_SCORE_MIN; i <= PHRED_SCORE_MAX; ++i) {
95✔
39
    result.push_back(std::pow(10.0, static_cast<double>(i) / -10.0));
188✔
40
  }
41

42
  return result;
1✔
43
}
×
44

45
const std::vector<double> g_phred_to_p = init_phred_to_p_values();
46

47
enum class read_mate
48
{
49
  unknown,
50
  mate_1,
51
  mate_2,
52
};
53

54
struct mate_info
55
{
56
  std::string_view desc() const
12✔
57
  {
58
    switch (mate) {
12✔
59
      case read_mate::unknown:
4✔
60
        return "unknown";
8✔
61
      case read_mate::mate_1:
5✔
62
        return "mate 1";
10✔
63
      case read_mate::mate_2:
3✔
64
        return "mate 2";
6✔
65
      default:
×
66
        AR_FAIL("Invalid mate in mate_info::desc");
×
67
    }
68
  }
69

70
  //! Read name without mate number or meta-data
71
  std::string_view name{};
72
  //! Which mate in a pair, if identified
73
  read_mate mate = read_mate::unknown;
74
  //! Position of the separator character in the header (if any)
75
  size_t sep_pos = std::string::npos;
76
};
77

78
mate_info
79
get_mate_info(const fastq& read, char mate_separator)
80✔
80
{
81
  const std::string_view header = read.header();
160✔
82

83
  size_t pos = header.find_first_of(' ');
80✔
84
  if (pos == std::string::npos) {
80✔
85
    pos = header.length();
78✔
86
  }
87

88
  mate_info info;
80✔
89
  if (pos >= 2 && header.at(pos - 2) == mate_separator) {
160✔
90
    const char digit = header.at(pos - 1);
92✔
91

92
    if (digit == '1') {
46✔
93
      info.mate = read_mate::mate_1;
22✔
94
      pos -= 2;
22✔
95
      info.sep_pos = pos;
22✔
96
    } else if (digit == '2') {
24✔
97
      info.mate = read_mate::mate_2;
15✔
98
      pos -= 2;
15✔
99
      info.sep_pos = pos;
15✔
100
    }
101
  }
102

103
  info.name = header.substr(0, pos);
80✔
104
  return info;
80✔
105
}
106

107
size_t
108
count_poly_x_tail(const std::string& m_sequence,
75✔
109
                  const char nucleotide,
110
                  const size_t min_length)
111
{
112
  // Maximum number of sequential mismatches
113
  const size_t max_seq_mismatches = 2;
75✔
114
  // Number of called bases required per mismatch (via fastp)
115
  const size_t min_bases_per_mismatch = 8;
75✔
116

117
  //! Number of bases in the alignment to trim, excluding leading mismatches
118
  size_t n_trim = 0;
75✔
119
  //! Number of bases in the alignment
120
  size_t n_bases = 0;
75✔
121
  //! Number of uncalled bases (Ns) in the alignment
122
  size_t n_uncalled = 0;
75✔
123
  //! Number of mismatches in the alignment
124
  size_t n_mismatches = 0;
75✔
125
  //! Current number of sequential mismatches in the alignment
126
  size_t n_seq_mismatches = 0;
75✔
127

128
  for (auto it = m_sequence.rbegin(); it != m_sequence.rend(); ++it) {
1,852✔
129
    n_bases++;
440✔
130

131
    if (*it == nucleotide) {
880✔
132
      n_trim = n_bases;
133
      n_seq_mismatches = 0;
134
    } else if (*it == 'N') {
240✔
135
      n_uncalled++;
14✔
136
      // Trailing Ns are allowed only after a match
137
      if (!n_seq_mismatches) {
14✔
138
        n_trim = n_bases;
12✔
139
      }
140
    } else {
141
      n_mismatches++;
106✔
142
      n_seq_mismatches++;
106✔
143
      if (n_seq_mismatches > max_seq_mismatches ||
212✔
144
          n_mismatches > std::max(min_length, n_bases - n_uncalled) /
264✔
145
                           min_bases_per_mismatch) {
146
        // The final mismatch is not counted as part of the alignment
147
        n_bases--;
148
        break;
149
      }
150
    }
151
  }
152

153
  if (n_bases - n_uncalled >= min_length) {
75✔
154
    return n_trim;
28✔
155
  }
156

157
  return 0;
158
}
159

160
} // namespace
161

162
///////////////////////////////////////////////////////////////////////////////
163
// fastq
164

165
fastq::fastq()
147✔
166
  : m_header()
147✔
167
  , m_sequence()
147✔
168
  , m_qualities()
147✔
169
{
170
}
147✔
171

172
fastq::fastq(std::string header,
728✔
173
             std::string sequence,
174
             std::string qualities,
175
             const fastq_encoding& encoding)
728✔
176
  : m_header(std::move(header))
748✔
177
  , m_sequence(std::move(sequence))
748✔
178
  , m_qualities(std::move(qualities))
748✔
179
{
180
  if (m_qualities.length() != m_sequence.length()) {
2,184✔
181
    throw fastq_error(
12✔
182
      "invalid FASTQ record; sequence/quality length does not match");
8✔
183
  }
184

185
  post_process(encoding);
724✔
186
}
808✔
187

188
fastq::fastq(std::string header, std::string sequence)
474✔
189
  : m_header(std::move(header))
474✔
190
  , m_sequence(std::move(sequence))
474✔
191
  , m_qualities(std::string(m_sequence.length(), '!'))
1,422✔
192
{
193
  post_process(FASTQ_ENCODING_33);
474✔
194
}
474✔
195

196
bool
197
fastq::operator==(const fastq& other) const
238✔
198
{
199
  return (m_header == other.m_header) && (m_sequence == other.m_sequence) &&
476✔
200
         (m_qualities == other.m_qualities);
238✔
201
}
202

203
std::string_view
204
fastq::name(const char mate_separator) const
24✔
205
{
206
  std::string_view header = m_header;
24✔
207
  const size_t pos = header.find_first_of(' ');
24✔
208
  if (pos != std::string::npos) {
24✔
209
    header = header.substr(0, pos);
2✔
210
  }
211

212
  if (mate_separator && header.size() > 1 &&
30✔
213
      (header.back() == '1' || header.back() == '2') &&
34✔
214
      header.at(header.length() - 2) == mate_separator) {
8✔
215
    header = header.substr(0, header.length() - 2);
2✔
216
  }
217

218
  return header;
24✔
219
}
220

221
size_t
222
fastq::count_ns() const
6✔
223
{
224
  return static_cast<size_t>(
6✔
225
    std::count(m_sequence.begin(), m_sequence.end(), 'N'));
24✔
226
}
227

228
namespace {
229

230
/**
231
 * Calculate the absolute sequence complexity score, under the assumption that
232
 * the sequence does not contain Ns. Should the sequence contain Ns, then this
233
 * algorithm would overestimate the sequence complexity, and therefore returns
234
 * -1 to indicate failure.
235
 */
236
int
237
fast_calculate_complexity(const std::string& sequence)
14✔
238
{
239
  // The last base is not checked in the loop below
240
  if (sequence.back() == 'N') {
28✔
241
    return -1;
242
  }
243

244
  const size_t length = sequence.length() - 1;
10✔
245
  size_t i = 0;
10✔
246
  int score = 0;
10✔
247

248
  // Fixed block sizes allows gcc/clang to optimize the loop
249
  const size_t BLOCK_SIZE = 16;
10✔
250
  for (; i + BLOCK_SIZE < length; i += BLOCK_SIZE) {
10✔
251
    for (size_t j = 0; j < BLOCK_SIZE; ++j, ++i) {
×
252
      if (sequence[i] != sequence[i + 1]) {
×
253
        score++;
×
254
      }
255

256
      if (sequence[i] == 'N') {
×
257
        return -1;
258
      }
259
    }
260
  }
261

262
  for (; i < length; ++i) {
35✔
263
    if (sequence[i] != sequence[i + 1]) {
84✔
264
      score++;
21✔
265
    }
266

267
    if (sequence[i] == 'N') {
56✔
268
      return -1;
269
    }
270
  }
271

272
  return score;
273
}
274

275
} // namespace
276

277
double
278
fastq::complexity() const
17✔
279
{
280
  if (m_sequence.length() < 2) {
34✔
281
    return 0.0;
282
  }
283

284
  // Try to use unrolled/vectorized algorithm
285
  int score = fast_calculate_complexity(m_sequence);
14✔
286

287
  if (score < 0) {
14✔
288
    // If the sequence contains Ns then use the slower calculation, that does
289
    // not treat Ns as distinct bases and thereby does not inflate the score
290
    char prev = 'N';
7✔
291
    for (const auto nuc : m_sequence) {
140✔
292
      if (nuc != 'N' && nuc != prev) {
28✔
293
        prev = nuc;
14✔
294
        score++;
14✔
295
      }
296
    }
297
  }
298

299
  return std::max(0.0, score / static_cast<double>(m_sequence.length() - 1));
42✔
300
}
301

302
double
303
fastq::mean_quality() const
×
304
{
305
  int64_t total = 0;
×
306
  AR_REQUIRE(!m_qualities.empty());
×
307
  for (const auto c : m_qualities) {
×
308
    total += c - PHRED_OFFSET_MIN;
×
309
  }
310

311
  return static_cast<double>(total) / static_cast<double>(m_qualities.length());
×
312
}
313

314
fastq::ntrimmed
315
fastq::trim_trailing_bases(const bool trim_ns,
9✔
316
                           char low_quality,
317
                           const bool preserve5p)
318
{
319
  low_quality += PHRED_OFFSET_MIN;
9✔
320
  auto is_quality_base = [&](size_t i) {
9✔
321
    return m_qualities.at(i) > low_quality &&
69✔
322
           (!trim_ns || m_sequence.at(i) != 'N');
38✔
323
  };
9✔
324

325
  size_t right_exclusive = 0;
9✔
326
  for (size_t i = m_sequence.length(); i; --i) {
30✔
327
    if (is_quality_base(i - 1)) {
19✔
328
      right_exclusive = i;
329
      break;
330
    }
331
  }
332

333
  size_t left_inclusive = 0;
9✔
334
  for (size_t i = 0; !preserve5p && i < right_exclusive; ++i) {
14✔
335
    if (is_quality_base(i)) {
10✔
336
      left_inclusive = i;
337
      break;
338
    }
339
  }
340

341
  return trim_sequence_and_qualities(left_inclusive, right_exclusive);
18✔
342
}
343

344
//! Calculates the size of the sliding window for quality trimming given a
345
//! read length and a user-defined window-size (fraction or whole number).
346
size_t
347
calculate_winlen(const size_t read_length, double window_size)
48✔
348
{
349
  if (window_size < 1.0) {
×
350
    window_size = window_size * static_cast<double>(read_length);
10✔
351
  }
352

353
  const auto winlen = static_cast<size_t>(window_size);
48✔
354
  if (winlen == 0 || winlen > read_length) {
48✔
355
    return read_length;
10✔
356
  }
357

358
  return winlen;
359
}
360

361
fastq::ntrimmed
362
fastq::trim_windowed_bases(const bool trim_ns,
53✔
363
                           char low_quality,
364
                           const double window_size,
365
                           const bool preserve5p)
366
{
367
  AR_REQUIRE(window_size >= 0.0);
61✔
368
  if (m_sequence.empty()) {
102✔
369
    return {};
6✔
370
  }
371

372
  low_quality += PHRED_OFFSET_MIN;
48✔
373
  auto is_quality_base = [&](size_t i) {
48✔
374
    return m_qualities.at(i) > low_quality &&
593✔
375
           (!trim_ns || m_sequence.at(i) != 'N');
266✔
376
  };
48✔
377

378
  const size_t winlen = calculate_winlen(length(), window_size);
96✔
379
  long running_sum =
48✔
380
    std::accumulate(m_qualities.begin(), m_qualities.begin() + winlen, 0);
192✔
381

382
  size_t left_inclusive = std::string::npos;
48✔
383
  size_t right_exclusive = std::string::npos;
48✔
384
  for (size_t offset = 0; offset + winlen <= length(); ++offset) {
538✔
385
    const long running_avg = running_sum / static_cast<long>(winlen);
261✔
386

387
    // We trim away low quality bases and Ns from the start of reads,
388
    // **before** we consider windows.
389
    if (left_inclusive == std::string::npos && is_quality_base(offset) &&
261✔
390
        running_avg > low_quality) {
42✔
391
      left_inclusive = offset;
392
    }
393

394
    if (left_inclusive != std::string::npos &&
261✔
395
        (running_avg <= low_quality || offset + winlen == length())) {
362✔
396
      right_exclusive = offset;
397
      while (right_exclusive < length() && is_quality_base(right_exclusive)) {
366✔
398
        right_exclusive++;
143✔
399
      }
400

401
      break;
402
    }
403

404
    running_sum -= m_qualities.at(offset);
442✔
405
    if (offset + winlen < length()) {
442✔
406
      running_sum += m_qualities.at(offset + winlen);
426✔
407
    }
408
  }
409

410
  if (left_inclusive == std::string::npos) {
48✔
411
    // No starting window found. Trim all bases starting from start.
412
    return trim_sequence_and_qualities(length(), length());
24✔
413
  } else if (preserve5p) {
40✔
414
    left_inclusive = 0;
3✔
415
  }
416

417
  AR_REQUIRE(right_exclusive != std::string::npos);
40✔
418
  return trim_sequence_and_qualities(left_inclusive, right_exclusive);
40✔
419
}
420

421
fastq::ntrimmed
422
fastq::mott_trimming(const double error_limit, const bool preserve5p)
7✔
423
{
424
  AR_REQUIRE(error_limit >= 0 && error_limit <= 1);
7✔
425

426
  size_t left_inclusive_temp = 0;
427
  size_t left_inclusive = 0;
428
  size_t right_exclusive = 0;
429

430
  double error_sum = 0.0;
431
  double error_sum_max = 0.0;
432

433
  for (size_t i = 0; i < length(); i++) {
190✔
434
    char phred = m_qualities.at(i) - PHRED_OFFSET_MIN;
176✔
435

436
    // Reduce weighting of very low-quality bases (inspired by seqtk) and
437
    // normalize Ns. The latter is not expected to matter for most data, but may
438
    // be relevant for some old/weird data and masked FASTQ reads.
439
    if (phred < 3 || m_sequence.at(i) == 'N') {
132✔
440
      phred = 3;
441
    }
442

443
    error_sum += error_limit - g_phred_to_p.at(phred);
176✔
444

445
    if (error_sum < 0.0) {
88✔
446
      // End of current segment (if any)
447
      left_inclusive_temp = i + 1;
52✔
448
      error_sum = 0;
52✔
449
    } else if (error_sum > error_sum_max) {
36✔
450
      // Extend best segment, possibly replacing the previous candidate
451
      left_inclusive = left_inclusive_temp;
28✔
452
      right_exclusive = i + 1;
28✔
453
      error_sum_max = error_sum;
28✔
454
    }
455
  }
456

457
  return trim_sequence_and_qualities(preserve5p ? 0 : left_inclusive,
13✔
458
                                     right_exclusive);
7✔
459
}
460

461
std::pair<char, size_t>
462
fastq::poly_x_trimming(const std::string& nucleotides, size_t min_length)
51✔
463
{
464
  size_t best_count = 0;
51✔
465
  char best_nucleotide = 'N';
51✔
466
  if (m_sequence.length() >= min_length && !nucleotides.empty()) {
146✔
467
    // Looping over all nucleotides ended up faster than a single pass algorithm
468
    for (const auto nucleotide : nucleotides) {
472✔
469
      const auto count = count_poly_x_tail(m_sequence, nucleotide, min_length);
75✔
470
      if (count > best_count) {
75✔
471
        best_nucleotide = nucleotide;
28✔
472
        best_count = count;
28✔
473
      }
474
    }
475

476
    truncate(0, length() - best_count);
86✔
477
  }
478

479
  return { best_nucleotide, best_count };
153✔
480
}
481

482
void
483
fastq::truncate(size_t pos, size_t len)
248✔
484
{
485
  AR_REQUIRE(pos == 0 || pos <= length());
303✔
486

487
  if (pos) {
247✔
488
    m_sequence.erase(0, pos);
50✔
489
    m_qualities.erase(0, pos);
50✔
490
  }
491

492
  if (len < length()) {
494✔
493
    m_sequence.erase(len);
128✔
494
    m_qualities.erase(len);
128✔
495
  }
496
}
247✔
497

498
void
499
fastq::reverse_complement()
2✔
500
{
501
  std::reverse(m_sequence.begin(), m_sequence.end());
6✔
502
  std::reverse(m_qualities.begin(), m_qualities.end());
6✔
503

504
  // Lookup table for complementary bases based only on the last 4 bits
505
  static const char complements[] = "-T-GA--C------N-";
2✔
506
  for (auto& nuc : m_sequence) {
38✔
507
    nuc = complements[nuc & 0xf];
10✔
508
  }
509
}
2✔
510

511
void
512
fastq::add_prefix_to_name(const std::string& prefix)
3✔
513
{
514
  if (!prefix.empty()) {
6✔
515
    m_header.insert(0, prefix);
2✔
516
  }
517
}
3✔
518

519
bool
520
fastq::read(line_reader_base& reader, const fastq_encoding& encoding)
28✔
521
{
522
  if (read_unsafe(reader)) {
28✔
523
    post_process(encoding);
10✔
524
    return true;
10✔
525
  }
526

527
  return false;
528
}
529

530
bool
531
fastq::read_unsafe(line_reader_base& reader)
28✔
532
{
533
  do {
35✔
534
    if (!reader.getline(m_header)) {
35✔
535
      // End of file; terminate gracefully
536
      return false;
537
    }
538
  } while (m_header.empty());
58✔
539

540
  if (m_header.size() < 2 || m_header.at(0) != '@') {
65✔
541
    throw fastq_error("Malformed or empty FASTQ header");
3✔
542
  } else {
543
    // FIXME: Erasing the '@' and then re-adding it later is pointless work
544
    m_header.erase(0, 1);
21✔
545
  }
546

547
  if (!reader.getline(m_sequence)) {
21✔
548
    throw fastq_error("partial FASTQ record; cut off after header");
3✔
549
  } else if (m_sequence.empty()) {
40✔
550
    throw fastq_error("sequence is empty");
6✔
551
  }
552

553
  // Most of the time this will only be '+' and not require an allocation
554
  std::string line;
18✔
555
  if (!reader.getline(line)) {
18✔
556
    throw fastq_error("partial FASTQ record; cut off after sequence");
3✔
557
  } else if (line.empty() || line.at(0) != '+') {
50✔
558
    throw fastq_error("FASTQ record lacks separator character (+)");
3✔
559
  }
560

561
  if (!reader.getline(m_qualities)) {
16✔
562
    throw fastq_error("partial FASTQ record; cut off after separator");
6✔
563
  } else if (m_qualities.length() != m_sequence.length()) {
42✔
564
    throw fastq_error("sequence/quality lengths do not match");
12✔
565
  }
566

567
  return true;
10✔
568
}
24✔
569

570
///////////////////////////////////////////////////////////////////////////////
571
// Public helper functions
572

573
char
574
fastq::p_to_phred_33(double p)
×
575
{
576
  // Lowest possible error rate representable is '~' (~5e-10)
577
  const auto min_p = std::max(5e-10, p);
×
578
  const auto raw_score = static_cast<int>(-10.0 * std::log10(min_p));
×
579

580
  return std::min<int>(PHRED_OFFSET_MAX, PHRED_OFFSET_MIN + raw_score);
×
581
}
582

583
char
584
fastq::guess_mate_separator(const std::vector<fastq>& reads_1,
12✔
585
                            const std::vector<fastq>& reads_2)
586
{
587
  AR_REQUIRE(reads_1.size() == reads_2.size());
44✔
588

589
  // Commonly used characters
590
  const std::string candidates = "/.:";
20✔
591

592
  for (auto candidate : candidates) {
95✔
593
    auto it_1 = reads_1.begin();
19✔
594
    auto it_2 = reads_2.begin();
19✔
595

596
    bool any_failures = false;
19✔
597
    while (it_1 != reads_1.end()) {
75✔
598
      const auto info1 = get_mate_info(*it_1++, candidate);
60✔
599
      const auto info2 = get_mate_info(*it_2++, candidate);
60✔
600

601
      if (info1.name != info2.name) {
20✔
602
        any_failures = true;
12✔
603
        break;
12✔
604
      }
605

606
      const auto mate_1 = info1.mate;
8✔
607
      const auto mate_2 = info2.mate;
8✔
608

609
      if (mate_1 != read_mate::unknown || mate_2 != read_mate::unknown) {
8✔
610
        if (mate_1 == mate_2) {
7✔
611
          // This could be valid data that just happens to include a known
612
          // mate separator in the name. But this could also happen if the
613
          // same reads are used for both mate 1 and mate 2, so we cannot
614
          // safely guess.
615
          return 0;
2✔
616
        } else if (mate_1 != read_mate::mate_1 || mate_2 != read_mate::mate_2) {
6✔
617
          // The mate separator seems to be correct, but the mate information
618
          // does not match: One mate is missing information or the order is
619
          // wrong. Return the identified separator and raise an error later.
620
          return candidate;
1✔
621
        }
622
      }
623
    }
624

625
    if (!any_failures) {
12✔
626
      return candidate;
5✔
627
    }
628
  }
629

630
  return 0;
3✔
631
}
10✔
632

633
void
634
fastq::normalize_paired_reads(fastq& mate1, fastq& mate2, char mate_separator)
23✔
635
{
636
  if (mate1.length() == 0 || mate2.length() == 0) {
67✔
637
    throw fastq_error("Pair contains empty reads");
9✔
638
  }
639

640
  const auto info1 = get_mate_info(mate1, mate_separator);
20✔
641
  const auto info2 = get_mate_info(mate2, mate_separator);
20✔
642

643
  if (info1.name != info2.name) {
20✔
644
    std::ostringstream error;
9✔
645
    error << "Pair contains reads with mismatching names:\n"
9✔
646
          << " - '" << info1.name << "'\n"
647
          << " - '" << info2.name << "'";
27✔
648

649
    if (info1.mate == read_mate::unknown || info2.mate == read_mate::unknown) {
9✔
650
      error << "\n\nNote that AdapterRemoval by determines the mate "
8✔
651
               "numbers as the digit found at the end of the read name, "
652
               "if this is preceded by";
8✔
653

654
      if (mate_separator) {
8✔
655
        error << "the character '" << mate_separator << "'";
8✔
656
      } else {
657
        error << "a character such as '/'";
×
658
      }
659

660
      error << "; if these data makes use of a different character to "
8✔
661
               "separate the mate number from the read name, then you "
662
               "will need to set the --mate-separator command-line "
663
               "option to the appropriate character.";
8✔
664
    }
665

666
    throw fastq_error(error.str());
36✔
667
  }
9✔
668

669
  if (info1.mate != read_mate::unknown || info2.mate != read_mate::unknown) {
11✔
670
    if (info1.mate != read_mate::mate_1 || info2.mate != read_mate::mate_2) {
9✔
671
      std::ostringstream error;
6✔
672
      error << "Inconsistent mate numbering; please verify data:\n"
6✔
673
            << "\nRead 1 identified as " << info1.desc() << ": " << mate1.name()
6✔
674
            << "\nRead 2 identified as " << info2.desc() << ": "
675
            << mate2.name();
24✔
676

677
      throw fastq_error(error.str());
24✔
678
    }
6✔
679

680
    AR_REQUIRE(info1.sep_pos == info2.sep_pos);
3✔
681
    mate1.m_header.at(info1.sep_pos) = MATE_SEPARATOR;
6✔
682
    mate2.m_header.at(info2.sep_pos) = MATE_SEPARATOR;
6✔
683
  }
684
}
5✔
685

686
///////////////////////////////////////////////////////////////////////////////
687
// Private helper functions
688

689
void
690
fastq::post_process(const fastq_encoding& encoding)
1,208✔
691
{
692
  encoding.process_nucleotides(m_sequence);
1,208✔
693
  encoding.process_qualities(m_qualities);
1,204✔
694
}
1,182✔
695

696
fastq::ntrimmed
697
fastq::trim_sequence_and_qualities(const size_t left_inclusive,
64✔
698
                                   const size_t right_exclusive)
699
{
700
  const ntrimmed summary(left_inclusive, length() - right_exclusive);
192✔
701
  truncate(left_inclusive, right_exclusive - left_inclusive);
64✔
702

703
  return summary;
64✔
704
}
705

706
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc