• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #42

24 Aug 2024 03:21PM UTC coverage: 79.763% (+0.2%) from 79.602%
#42

push

travis-ci

MikkelSchubert
update changelog

add v2.3.4 and fix some minor issues

2286 of 2866 relevant lines covered (79.76%)

14253.24 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.86
/src/fastq.cpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2011 by Stinus Lindgreen - stinus@binf.ku.dk            *
5
 * Copyright (C) 2014 by Mikkel Schubert - mikkelsch@gmail.com           *
6
 *                                                                       *
7
 * This program is free software: you can redistribute it and/or modify  *
8
 * it under the terms of the GNU General Public License as published by  *
9
 * the Free Software Foundation, either version 3 of the License, or     *
10
 * (at your option) any later version.                                   *
11
 *                                                                       *
12
 * This program is distributed in the hope that it will be useful,       *
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
15
 * GNU General Public License for more details.                          *
16
 *                                                                       *
17
 * You should have received a copy of the GNU General Public License     *
18
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
19
\*************************************************************************/
20
#include "fastq.hpp"
21
#include "debug.hpp"      // for AR_REQUIRE, AR_FAIL
22
#include "errors.hpp"     // for fastq_error
23
#include "linereader.hpp" // for line_reader_base
24
#include <algorithm>      // for reverse, count, max, min
25
#include <cmath>          // for log10, pow
26
#include <numeric>        // for accumulate
27
#include <sstream>        // for ostringstream
28
#include <string_view>    // for string_view
29

30
namespace adapterremoval {
31

32
namespace {
33

34
std::vector<double>
35
init_phred_to_p_values()
1✔
36
{
37
  std::vector<double> result;
1✔
38
  for (size_t i = PHRED_SCORE_MIN; i <= PHRED_SCORE_MAX; ++i) {
95✔
39
    result.push_back(std::pow(10.0, static_cast<double>(i) / -10.0));
188✔
40
  }
41

42
  return result;
1✔
43
}
×
44

45
const std::vector<double> g_phred_to_p = init_phred_to_p_values();
46

47
enum class read_mate
48
{
49
  unknown,
50
  mate_1,
51
  mate_2,
52
};
53

54
struct mate_info
55
{
56
  std::string_view desc() const
12✔
57
  {
58
    switch (mate) {
12✔
59
      case read_mate::unknown:
4✔
60
        return "unknown";
8✔
61
      case read_mate::mate_1:
5✔
62
        return "mate 1";
10✔
63
      case read_mate::mate_2:
3✔
64
        return "mate 2";
6✔
65
      default:
×
66
        AR_FAIL("Invalid mate in mate_info::desc");
×
67
    }
68
  }
69

70
  //! Read name without mate number or meta-data
71
  std::string_view name{};
72
  //! Which mate in a pair, if identified
73
  read_mate mate = read_mate::unknown;
74
  //! Position of the separator character in the header (if any)
75
  size_t sep_pos = std::string::npos;
76
};
77

78
mate_info
79
get_mate_info(const fastq& read, char mate_separator)
80✔
80
{
81
  const std::string_view header = read.header();
160✔
82

83
  size_t pos = header.find_first_of(' ');
80✔
84
  if (pos == std::string::npos) {
80✔
85
    pos = header.length();
78✔
86
  }
87

88
  mate_info info;
80✔
89
  if (pos >= 2 && header.at(pos - 2) == mate_separator) {
160✔
90
    const char digit = header.at(pos - 1);
92✔
91

92
    if (digit == '1') {
46✔
93
      info.mate = read_mate::mate_1;
22✔
94
      pos -= 2;
22✔
95
      info.sep_pos = pos;
22✔
96
    } else if (digit == '2') {
24✔
97
      info.mate = read_mate::mate_2;
15✔
98
      pos -= 2;
15✔
99
      info.sep_pos = pos;
15✔
100
    }
101
  }
102

103
  info.name = header.substr(0, pos);
80✔
104
  return info;
80✔
105
}
106

107
size_t
108
count_poly_x_tail(const std::string& m_sequence,
75✔
109
                  const char nucleotide,
110
                  const size_t min_length)
111
{
112
  // Maximum number of sequential mismatches
113
  const size_t max_seq_mismatches = 2;
75✔
114
  // Number of called bases required per mismatch (via fastp)
115
  const size_t min_bases_per_mismatch = 8;
75✔
116

117
  //! Number of bases in the alignment to trim, excluding leading mismatches
118
  size_t n_trim = 0;
75✔
119
  //! Number of bases in the alignment
120
  size_t n_bases = 0;
75✔
121
  //! Number of uncalled bases (Ns) in the alignment
122
  size_t n_uncalled = 0;
75✔
123
  //! Number of mismatches in the alignment
124
  size_t n_mismatches = 0;
75✔
125
  //! Current number of sequential mismatches in the alignment
126
  size_t n_seq_mismatches = 0;
75✔
127

128
  for (auto it = m_sequence.rbegin(); it != m_sequence.rend(); ++it) {
1,852✔
129
    n_bases++;
440✔
130

131
    if (*it == nucleotide) {
880✔
132
      n_trim = n_bases;
133
      n_seq_mismatches = 0;
134
    } else if (*it == 'N') {
240✔
135
      n_uncalled++;
14✔
136
      // Trailing Ns are allowed only after a match
137
      if (!n_seq_mismatches) {
14✔
138
        n_trim = n_bases;
12✔
139
      }
140
    } else {
141
      n_mismatches++;
106✔
142
      n_seq_mismatches++;
106✔
143
      if (n_seq_mismatches > max_seq_mismatches ||
212✔
144
          n_mismatches > std::max(min_length, n_bases - n_uncalled) /
264✔
145
                           min_bases_per_mismatch) {
146
        // The final mismatch is not counted as part of the alignment
147
        n_bases--;
148
        break;
149
      }
150
    }
151
  }
152

153
  if (n_bases - n_uncalled >= min_length) {
75✔
154
    return n_trim;
28✔
155
  }
156

157
  return 0;
158
}
159

160
} // namespace
161

162
///////////////////////////////////////////////////////////////////////////////
163
// fastq
164

165
fastq::fastq()
148✔
166
  : m_header("@")
296✔
167
  , m_sequence()
148✔
168
  , m_qualities()
148✔
169
{
170
}
148✔
171

172
fastq::fastq(std::string_view header,
1,210✔
173
             std::string sequence,
174
             std::string qualities,
175
             const fastq_encoding& encoding)
1,210✔
176
  : m_header()
1,230✔
177
  , m_sequence(std::move(sequence))
1,230✔
178
  , m_qualities(std::move(qualities))
1,230✔
179
{
180
  if (header.empty() || header.front() != '@') {
2,418✔
181
    m_header.push_back('@');
1,209✔
182
  }
183

184
  m_header.append(header);
1,210✔
185
  if (m_qualities.length() != m_sequence.length()) {
3,630✔
186
    throw fastq_error(
12✔
187
      "invalid FASTQ record; sequence/quality length does not match");
8✔
188
  }
189

190
  post_process(encoding);
1,206✔
191
}
1,290✔
192

193
fastq::fastq(std::string_view header, std::string sequence)
474✔
194
  : fastq(header, sequence, std::string(sequence.length(), '!'))
2,370✔
195
{
196
}
474✔
197

198
bool
199
fastq::operator==(const fastq& other) const
238✔
200
{
201
  return (m_header == other.m_header) && (m_sequence == other.m_sequence) &&
476✔
202
         (m_qualities == other.m_qualities);
238✔
203
}
204

205
std::string_view
206
fastq::name(const char mate_separator) const
24✔
207
{
208
  AR_REQUIRE(!m_header.empty() && m_header.front() == '@');
72✔
209

210
  std::string_view header = m_header;
24✔
211
  const size_t pos = header.find_first_of(' ');
24✔
212
  if (pos != std::string::npos) {
24✔
213
    header = header.substr(1, pos - 1);
2✔
214
  } else {
215
    header = header.substr(1);
22✔
216
  }
217

218
  if (mate_separator && header.size() > 1 &&
6✔
219
      (header.back() == '1' || header.back() == '2') &&
34✔
220
      header.at(header.length() - 2) == mate_separator) {
8✔
221
    header = header.substr(0, header.length() - 2);
2✔
222
  }
223

224
  return header;
24✔
225
}
226

227
size_t
228
fastq::count_ns() const
6✔
229
{
230
  return static_cast<size_t>(
6✔
231
    std::count(m_sequence.begin(), m_sequence.end(), 'N'));
24✔
232
}
233

234
namespace {
235

236
/**
237
 * Calculate the absolute sequence complexity score, under the assumption that
238
 * the sequence does not contain Ns. Should the sequence contain Ns, then this
239
 * algorithm would overestimate the sequence complexity, and therefore returns
240
 * -1 to indicate failure.
241
 */
242
int
243
fast_calculate_complexity(const std::string& sequence)
14✔
244
{
245
  // The last base is not checked in the loop below
246
  if (sequence.back() == 'N') {
28✔
247
    return -1;
248
  }
249

250
  const size_t length = sequence.length() - 1;
10✔
251
  size_t i = 0;
10✔
252
  int score = 0;
10✔
253

254
  // Fixed block sizes allows gcc/clang to optimize the loop
255
  const size_t BLOCK_SIZE = 16;
10✔
256
  for (; i + BLOCK_SIZE < length; i += BLOCK_SIZE) {
10✔
257
    for (size_t j = 0; j < BLOCK_SIZE; ++j, ++i) {
×
258
      if (sequence[i] != sequence[i + 1]) {
×
259
        score++;
×
260
      }
261

262
      if (sequence[i] == 'N') {
×
263
        return -1;
264
      }
265
    }
266
  }
267

268
  for (; i < length; ++i) {
35✔
269
    if (sequence[i] != sequence[i + 1]) {
84✔
270
      score++;
21✔
271
    }
272

273
    if (sequence[i] == 'N') {
56✔
274
      return -1;
275
    }
276
  }
277

278
  return score;
279
}
280

281
} // namespace
282

283
double
284
fastq::complexity() const
17✔
285
{
286
  if (m_sequence.length() < 2) {
34✔
287
    return 0.0;
288
  }
289

290
  // Try to use unrolled/vectorized algorithm
291
  int score = fast_calculate_complexity(m_sequence);
14✔
292

293
  if (score < 0) {
14✔
294
    // If the sequence contains Ns then use the slower calculation, that does
295
    // not treat Ns as distinct bases and thereby does not inflate the score
296
    char prev = 'N';
7✔
297
    for (const auto nuc : m_sequence) {
140✔
298
      if (nuc != 'N' && nuc != prev) {
28✔
299
        prev = nuc;
14✔
300
        score++;
14✔
301
      }
302
    }
303
  }
304

305
  return std::max(0.0, score / static_cast<double>(m_sequence.length() - 1));
42✔
306
}
307

308
double
309
fastq::mean_quality() const
8✔
310
{
311
  AR_REQUIRE(!m_qualities.empty());
20✔
312
  int64_t sum = -PHRED_OFFSET_MIN * static_cast<int64_t>(m_qualities.length());
7✔
313
  for (const auto c : m_qualities) {
3,136✔
314
    sum += c;
1,036✔
315
  }
316

317
  return static_cast<double>(sum) / static_cast<double>(m_qualities.length());
14✔
318
}
319

320
fastq::ntrimmed
321
fastq::trim_trailing_bases(const bool trim_ns,
9✔
322
                           char low_quality,
323
                           const bool preserve5p)
324
{
325
  low_quality += PHRED_OFFSET_MIN;
9✔
326
  auto is_quality_base = [&](size_t i) {
9✔
327
    return m_qualities.at(i) > low_quality &&
69✔
328
           (!trim_ns || m_sequence.at(i) != 'N');
38✔
329
  };
9✔
330

331
  size_t right_exclusive = 0;
9✔
332
  for (size_t i = m_sequence.length(); i; --i) {
30✔
333
    if (is_quality_base(i - 1)) {
19✔
334
      right_exclusive = i;
335
      break;
336
    }
337
  }
338

339
  size_t left_inclusive = 0;
9✔
340
  for (size_t i = 0; !preserve5p && i < right_exclusive; ++i) {
14✔
341
    if (is_quality_base(i)) {
10✔
342
      left_inclusive = i;
343
      break;
344
    }
345
  }
346

347
  return trim_sequence_and_qualities(left_inclusive, right_exclusive);
18✔
348
}
349

350
//! Calculates the size of the sliding window for quality trimming given a
351
//! read length and a user-defined window-size (fraction or whole number).
352
size_t
353
calculate_winlen(const size_t read_length, double window_size)
48✔
354
{
355
  if (window_size < 1.0) {
×
356
    window_size = window_size * static_cast<double>(read_length);
10✔
357
  }
358

359
  const auto winlen = static_cast<size_t>(window_size);
48✔
360
  if (winlen == 0 || winlen > read_length) {
48✔
361
    return read_length;
10✔
362
  }
363

364
  return winlen;
365
}
366

367
fastq::ntrimmed
368
fastq::trim_windowed_bases(const bool trim_ns,
53✔
369
                           char low_quality,
370
                           const double window_size,
371
                           const bool preserve5p)
372
{
373
  AR_REQUIRE(window_size >= 0.0);
61✔
374
  if (m_sequence.empty()) {
102✔
375
    return {};
6✔
376
  }
377

378
  low_quality += PHRED_OFFSET_MIN;
48✔
379
  auto is_quality_base = [&](size_t i) {
48✔
380
    return m_qualities.at(i) > low_quality &&
593✔
381
           (!trim_ns || m_sequence.at(i) != 'N');
266✔
382
  };
48✔
383

384
  const size_t winlen = calculate_winlen(length(), window_size);
96✔
385
  long running_sum =
48✔
386
    std::accumulate(m_qualities.begin(), m_qualities.begin() + winlen, 0);
192✔
387

388
  size_t left_inclusive = std::string::npos;
48✔
389
  size_t right_exclusive = std::string::npos;
48✔
390
  for (size_t offset = 0; offset + winlen <= length(); ++offset) {
538✔
391
    const long running_avg = running_sum / static_cast<long>(winlen);
261✔
392

393
    // We trim away low quality bases and Ns from the start of reads,
394
    // **before** we consider windows.
395
    if (left_inclusive == std::string::npos && is_quality_base(offset) &&
261✔
396
        running_avg > low_quality) {
42✔
397
      left_inclusive = offset;
398
    }
399

400
    if (left_inclusive != std::string::npos &&
261✔
401
        (running_avg <= low_quality || offset + winlen == length())) {
362✔
402
      right_exclusive = offset;
403
      while (right_exclusive < length() && is_quality_base(right_exclusive)) {
366✔
404
        right_exclusive++;
143✔
405
      }
406

407
      break;
408
    }
409

410
    running_sum -= m_qualities.at(offset);
442✔
411
    if (offset + winlen < length()) {
442✔
412
      running_sum += m_qualities.at(offset + winlen);
426✔
413
    }
414
  }
415

416
  if (left_inclusive == std::string::npos) {
48✔
417
    // No starting window found. Trim all bases starting from start.
418
    return trim_sequence_and_qualities(length(), length());
24✔
419
  } else if (preserve5p) {
40✔
420
    left_inclusive = 0;
3✔
421
  }
422

423
  AR_REQUIRE(right_exclusive != std::string::npos);
40✔
424
  return trim_sequence_and_qualities(left_inclusive, right_exclusive);
40✔
425
}
426

427
fastq::ntrimmed
428
fastq::mott_trimming(const double error_limit, const bool preserve5p)
7✔
429
{
430
  AR_REQUIRE(error_limit >= 0 && error_limit <= 1);
7✔
431

432
  size_t left_inclusive_temp = 0;
433
  size_t left_inclusive = 0;
434
  size_t right_exclusive = 0;
435

436
  double error_sum = 0.0;
437
  double error_sum_max = 0.0;
438

439
  for (size_t i = 0; i < length(); i++) {
190✔
440
    char phred = m_qualities.at(i) - PHRED_OFFSET_MIN;
176✔
441

442
    // Reduce weighting of very low-quality bases (inspired by seqtk) and
443
    // normalize Ns. The latter is not expected to matter for most data, but may
444
    // be relevant for some old/weird data and masked FASTQ reads.
445
    if (phred < 3 || m_sequence.at(i) == 'N') {
132✔
446
      phred = 3;
447
    }
448

449
    error_sum += error_limit - g_phred_to_p.at(phred);
176✔
450

451
    if (error_sum < 0.0) {
88✔
452
      // End of current segment (if any)
453
      left_inclusive_temp = i + 1;
52✔
454
      error_sum = 0;
52✔
455
    } else if (error_sum > error_sum_max) {
36✔
456
      // Extend best segment, possibly replacing the previous candidate
457
      left_inclusive = left_inclusive_temp;
28✔
458
      right_exclusive = i + 1;
28✔
459
      error_sum_max = error_sum;
28✔
460
    }
461
  }
462

463
  return trim_sequence_and_qualities(preserve5p ? 0 : left_inclusive,
13✔
464
                                     right_exclusive);
7✔
465
}
466

467
std::pair<char, size_t>
468
fastq::poly_x_trimming(const std::string& nucleotides, size_t min_length)
51✔
469
{
470
  size_t best_count = 0;
51✔
471
  char best_nucleotide = 'N';
51✔
472
  if (m_sequence.length() >= min_length && !nucleotides.empty()) {
146✔
473
    // Looping over all nucleotides ended up faster than a single pass algorithm
474
    for (const auto nucleotide : nucleotides) {
472✔
475
      const auto count = count_poly_x_tail(m_sequence, nucleotide, min_length);
75✔
476
      if (count > best_count) {
75✔
477
        best_nucleotide = nucleotide;
28✔
478
        best_count = count;
28✔
479
      }
480
    }
481

482
    truncate(0, length() - best_count);
86✔
483
  }
484

485
  return { best_nucleotide, best_count };
153✔
486
}
487

488
void
489
fastq::truncate(size_t pos, size_t len)
248✔
490
{
491
  AR_REQUIRE(pos == 0 || pos <= length());
303✔
492

493
  if (pos) {
247✔
494
    m_sequence.erase(0, pos);
50✔
495
    m_qualities.erase(0, pos);
50✔
496
  }
497

498
  if (len < length()) {
494✔
499
    m_sequence.erase(len);
128✔
500
    m_qualities.erase(len);
128✔
501
  }
502
}
247✔
503

504
void
505
fastq::reverse_complement()
2✔
506
{
507
  std::reverse(m_sequence.begin(), m_sequence.end());
6✔
508
  std::reverse(m_qualities.begin(), m_qualities.end());
6✔
509

510
  // Lookup table for complementary bases based only on the last 4 bits
511
  static const char complements[] = "-T-GA--C------N-";
2✔
512
  for (auto& nuc : m_sequence) {
38✔
513
    nuc = complements[nuc & 0xf];
10✔
514
  }
515
}
2✔
516

517
void
518
fastq::add_prefix_to_name(const std::string& prefix)
3✔
519
{
520
  AR_REQUIRE(!m_header.empty());
6✔
521
  if (!prefix.empty()) {
6✔
522
    m_header.insert(1, prefix);
2✔
523
  }
524
}
3✔
525

526
bool
527
fastq::read(line_reader_base& reader, const fastq_encoding& encoding)
28✔
528
{
529
  if (read_unsafe(reader)) {
28✔
530
    post_process(encoding);
10✔
531
    return true;
10✔
532
  }
533

534
  return false;
535
}
536

537
bool
538
fastq::read_unsafe(line_reader_base& reader)
28✔
539
{
540
  do {
35✔
541
    if (!reader.getline(m_header)) {
35✔
542
      // End of file; terminate gracefully
543
      return false;
544
    }
545
  } while (m_header.empty());
58✔
546

547
  if (m_header.size() < 2 || m_header.at(0) != '@') {
65✔
548
    throw fastq_error("Malformed or empty FASTQ header");
3✔
549
  }
550

551
  if (!reader.getline(m_sequence)) {
21✔
552
    throw fastq_error("partial FASTQ record; cut off after header");
3✔
553
  } else if (m_sequence.empty()) {
40✔
554
    throw fastq_error("sequence is empty");
6✔
555
  }
556

557
  // Most of the time this will only be '+' and not require an allocation
558
  std::string line;
18✔
559
  if (!reader.getline(line)) {
18✔
560
    throw fastq_error("partial FASTQ record; cut off after sequence");
3✔
561
  } else if (line.empty() || line.at(0) != '+') {
50✔
562
    throw fastq_error("FASTQ record lacks separator character (+)");
3✔
563
  }
564

565
  if (!reader.getline(m_qualities)) {
16✔
566
    throw fastq_error("partial FASTQ record; cut off after separator");
6✔
567
  } else if (m_qualities.length() != m_sequence.length()) {
42✔
568
    throw fastq_error("sequence/quality lengths do not match");
12✔
569
  }
570

571
  return true;
10✔
572
}
24✔
573

574
///////////////////////////////////////////////////////////////////////////////
575
// Public helper functions
576

577
char
578
fastq::p_to_phred_33(double p)
×
579
{
580
  // Lowest possible error rate representable is '~' (~5e-10)
581
  const auto min_p = std::max(5e-10, p);
×
582
  const auto raw_score = static_cast<int>(-10.0 * std::log10(min_p));
×
583

584
  return std::min<int>(PHRED_OFFSET_MAX, PHRED_OFFSET_MIN + raw_score);
×
585
}
586

587
char
588
fastq::guess_mate_separator(const std::vector<fastq>& reads_1,
12✔
589
                            const std::vector<fastq>& reads_2)
590
{
591
  AR_REQUIRE(reads_1.size() == reads_2.size());
44✔
592

593
  // Commonly used characters
594
  const std::string candidates = "/.:";
20✔
595

596
  for (auto candidate : candidates) {
95✔
597
    auto it_1 = reads_1.begin();
19✔
598
    auto it_2 = reads_2.begin();
19✔
599

600
    bool any_failures = false;
19✔
601
    while (it_1 != reads_1.end()) {
75✔
602
      const auto info1 = get_mate_info(*it_1++, candidate);
60✔
603
      const auto info2 = get_mate_info(*it_2++, candidate);
60✔
604

605
      if (info1.name != info2.name) {
20✔
606
        any_failures = true;
12✔
607
        break;
12✔
608
      }
609

610
      const auto mate_1 = info1.mate;
8✔
611
      const auto mate_2 = info2.mate;
8✔
612

613
      if (mate_1 != read_mate::unknown || mate_2 != read_mate::unknown) {
8✔
614
        if (mate_1 == mate_2) {
7✔
615
          // This could be valid data that just happens to include a known
616
          // mate separator in the name. But this could also happen if the
617
          // same reads are used for both mate 1 and mate 2, so we cannot
618
          // safely guess.
619
          return 0;
2✔
620
        } else if (mate_1 != read_mate::mate_1 || mate_2 != read_mate::mate_2) {
6✔
621
          // The mate separator seems to be correct, but the mate information
622
          // does not match: One mate is missing information or the order is
623
          // wrong. Return the identified separator and raise an error later.
624
          return candidate;
1✔
625
        }
626
      }
627
    }
628

629
    if (!any_failures) {
12✔
630
      return candidate;
5✔
631
    }
632
  }
633

634
  return 0;
3✔
635
}
10✔
636

637
void
638
fastq::normalize_paired_reads(fastq& mate1, fastq& mate2, char mate_separator)
23✔
639
{
640
  if (mate1.length() == 0 || mate2.length() == 0) {
67✔
641
    throw fastq_error("Pair contains empty reads");
9✔
642
  }
643

644
  const auto info1 = get_mate_info(mate1, mate_separator);
20✔
645
  const auto info2 = get_mate_info(mate2, mate_separator);
20✔
646

647
  if (info1.name != info2.name) {
20✔
648
    std::ostringstream error;
9✔
649
    error << "Pair contains reads with mismatching names:\n"
9✔
650
          << " - '" << info1.name << "'\n"
651
          << " - '" << info2.name << "'";
27✔
652

653
    if (info1.mate == read_mate::unknown || info2.mate == read_mate::unknown) {
9✔
654
      error << "\n\nNote that AdapterRemoval by determines the mate "
8✔
655
               "numbers as the digit found at the end of the read name, "
656
               "if this is preceded by";
8✔
657

658
      if (mate_separator) {
8✔
659
        error << "the character '" << mate_separator << "'";
8✔
660
      } else {
661
        error << "a character such as '/'";
×
662
      }
663

664
      error << "; if these data makes use of a different character to "
8✔
665
               "separate the mate number from the read name, then you "
666
               "will need to set the --mate-separator command-line "
667
               "option to the appropriate character.";
8✔
668
    }
669

670
    throw fastq_error(error.str());
36✔
671
  }
9✔
672

673
  if (info1.mate != read_mate::unknown || info2.mate != read_mate::unknown) {
11✔
674
    if (info1.mate != read_mate::mate_1 || info2.mate != read_mate::mate_2) {
9✔
675
      std::ostringstream error;
6✔
676
      error << "Inconsistent mate numbering; please verify data:\n"
6✔
677
            << "\nRead 1 identified as " << info1.desc() << ": " << mate1.name()
6✔
678
            << "\nRead 2 identified as " << info2.desc() << ": "
679
            << mate2.name();
24✔
680

681
      throw fastq_error(error.str());
24✔
682
    }
6✔
683

684
    AR_REQUIRE(info1.sep_pos == info2.sep_pos);
3✔
685
    mate1.m_header.at(info1.sep_pos) = MATE_SEPARATOR;
6✔
686
    mate2.m_header.at(info2.sep_pos) = MATE_SEPARATOR;
6✔
687
  }
688
}
5✔
689

690
///////////////////////////////////////////////////////////////////////////////
691
// Private helper functions
692

693
void
694
fastq::post_process(const fastq_encoding& encoding)
1,216✔
695
{
696
  encoding.process_nucleotides(m_sequence);
1,216✔
697
  encoding.process_qualities(m_qualities);
1,212✔
698
}
1,190✔
699

700
fastq::ntrimmed
701
fastq::trim_sequence_and_qualities(const size_t left_inclusive,
64✔
702
                                   const size_t right_exclusive)
703
{
704
  const ntrimmed summary(left_inclusive, length() - right_exclusive);
192✔
705
  truncate(left_inclusive, right_exclusive - left_inclusive);
64✔
706

707
  return summary;
64✔
708
}
709

710
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc