MikkelSchubert / adapterremoval / #89

Committed 13 Apr 2025 09:57AM UTC coverage: 27.054% (-0.4%) from 27.414%

Build # #89

Build Type

push

travis-ci

Committed by

web-flow

Commit Message

report individual barcode counts when multiple barcodes are used (#116)

* fix sample percentages in per-sample HTML table
* count per-barcode hits rather than per sample
* report individual barcode counts in JSON report
* report individual barcode counts in HTML report
* stack barcodes in demultiplexing barplot and identify in tooltip
* adds button to show/hide multiple barcodes

Run Details

0 of 74 new or added lines in 4 files covered. (0.0%)

626 existing lines in 4 files now uncovered.

2723 of 10065 relevant lines covered (27.05%)

4005.83 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/src/statistics.cpp

// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: 2011 Stinus Lindgreen <stinus@binf.ku.dk>
// SPDX-FileCopyrightText: 2014 Mikkel Schubert <mikkelsch@gmail.com>
// SPDX-FileCopyrightText: 2010-17 Simon Andrews
#include "statistics.hpp"
#include "adapter_id.hpp" // for adapter_id_statistics
#include "debug.hpp"      // for AR_REQUIRE
#include "fastq.hpp"      // for ACGTN, fastq, ACGT
#include "fastq_enc.hpp"  // for PHRED_OFFSET_MIN, PHRED_SCORE_MAX
#include "simd.hpp"       // for size_t
#include "utilities.hpp"  // for prng_seed
#include <algorithm>      // for max, min
#include <cstdlib>        // for size_t
#include <memory>         // for make_shared, __shared_ptr_access, __shared_...
#include <string>         // for basic_string, string
#include <string_view>    // for string_view

namespace adapterremoval {

////////////////////////////////////////////////////////////////////////////////

const size_t DUPLICATION_LEVELS = 16;
const string_vec DUPLICATION_LABELS = { "1",    "2",   "3",   "4",
                                        "5",    "6",   "7",   "8",
                                        "9",    ">10", ">50", ">100",
                                        ">500", ">1k", ">5k", ">10k" };

size_t
duplication_level(size_t count)
{
  if (count > 10000) {
    return 15;
  } else if (count > 5000) {
    return 14;
  } else if (count > 1000) {
    return 13;
  } else if (count > 500) {
    return 12;
  } else if (count > 100) {
    return 11;
  } else if (count > 50) {
    return 10;
  } else if (count > 10) {
    return 9;
  } else {
    return count - 1;
  }
}

duplication_statistics::summary::summary()
  : labels(DUPLICATION_LABELS)
  , total_sequences(DUPLICATION_LEVELS)
  , unique_sequences(DUPLICATION_LEVELS)
  , unique_frac()
{
}

duplication_statistics::duplication_statistics(size_t max_unique_sequences)
  : m_max_unique_sequences(max_unique_sequences)
  , m_sequence_counts()
  , m_sequences_counted()
  , m_sequences_counted_at_max()
{
  m_sequence_counts.reserve(max_unique_sequences);
}

void
duplication_statistics::process(const fastq& read)
{
  if (m_max_unique_sequences) {
    if (read.length() > 75) {
      insert(read.sequence().substr(0, 50));
    } else {
      insert(read.sequence());
    }
  }
}

duplication_statistics::summary
duplication_statistics::summarize() const
{
  // Histogram of sequence duplication counts
  robin_hood::unordered_flat_map<size_t, size_t> histogram;
  for (const auto& it : m_sequence_counts) {
    histogram[it.second]++;
  }

  duplication_statistics::summary result;

  result.total_sequences.resize_up_to(DUPLICATION_LEVELS);
  result.unique_sequences.resize_up_to(DUPLICATION_LEVELS);

  for (const auto& it : histogram) {
    const auto level = it.first;
    const auto count = correct_count(level, it.second);

    const auto slot = duplication_level(level);
    result.total_sequences.inc(slot, count * level);
    result.unique_sequences.inc(slot, count);
  }

  const auto unique_count = result.unique_sequences.sum();
  const auto total_count = result.total_sequences.sum();

  if (total_count) {
    result.unique_frac = unique_count / static_cast<double>(total_count);
    result.total_sequences = result.total_sequences / total_count;
    result.unique_sequences = result.unique_sequences / unique_count;
  } else {
    result.unique_frac = 1.0;
  }

  return result;
}

double
duplication_statistics::correct_count(size_t bin, size_t count) const
{
  // Adapted from
  //   https://github.com/s-andrews/FastQC/blob/v0.11.9/uk/ac/babraham/FastQC/Modules/DuplicationLevel.java#L158

  // See if we can bail out early
  if (m_sequences_counted_at_max == m_sequences_counted) {
    return count;
  }

  // If there aren't enough sequences left to hide another sequence with this
  // count then we can also skip the calculation
  if (m_sequences_counted - count < m_sequences_counted_at_max) {
    return count;
  }

  // If not then we need to see what the likelihood is that we had another
  // sequence with this number of observations which we would have missed.

  // We'll start by working out the probability of NOT seeing a sequence with
  // this duplication level within the first countAtLimit sequences of
  // count.  This is easier than calculating the probability of
  // seeing it.
  double p_not_seeing_at_limit = 1.0;

  // To save doing long calculations which are never going to produce anything
  // meaningful we'll set a limit to our p-value calculation.  This is the
  // probability below which we won't increase our count by 0.01 of an
  // observation.  Once we're below this we stop caring about the corrected
  // value since it's going to be so close to the observed value that we can
  // just return that instead.
  const double limit_of_caring = 1.0 - (count / (count + 0.01));

  for (size_t i = 0; i < m_sequences_counted_at_max; i++) {
    p_not_seeing_at_limit *= ((m_sequences_counted - i) - bin) /
                             static_cast<double>(m_sequences_counted - i);

    if (p_not_seeing_at_limit < limit_of_caring) {
      p_not_seeing_at_limit = 0;
      break;
    }
  }

  // Scale by the chance of seeing
  return count / (1 - p_not_seeing_at_limit);
}

void
duplication_statistics::insert(const std::string& key)
{
  m_sequences_counted++;

  const auto unique_seqs = m_sequence_counts.size();
  if (unique_seqs >= m_max_unique_sequences) {
    auto it = m_sequence_counts.find(key);
    if (it != m_sequence_counts.end()) {
      it->second++;
    }
  } else {
    m_sequence_counts[key]++;
    m_sequences_counted_at_max++;
  }
}

////////////////////////////////////////////////////////////////////////////////

namespace {

void
smoothed_gc_count(rates& distribution, size_t count, size_t length)
{
  // counts are smoothed across adjacent (percentage) bins
  const auto lower = std::max<double>(0, 100 * (count - 0.5) / length);
  const auto upper = std::min<double>(100, 100 * (count + 0.5) / length);

  const size_t lower_i = lower;
  const size_t upper_i = upper;

  const auto lower_f = lower - lower_i;
  const auto upper_f = upper - upper_i;

  if (lower_i == upper_i) {
    // Count falls in a single bin
    distribution.inc(lower_i, (upper - lower));
  } else {
    // Increment first bin/partially overlapped adjacent bins
    distribution.inc(lower_i, (1 - lower_f));
    distribution.inc(upper_i, upper_f);

    // Ranges are half open, except for the final bin
    const auto final_i = count == length ? 101 : upper_i;
    for (size_t i = lower + 1; i < final_i; ++i) {
      distribution.inc(i);
    }
  }
}

} // namespace

fastq_statistics::fastq_statistics(double sample_rate)
  : fastq_statistics(sample_rate, prng_seed())
{
}

fastq_statistics::fastq_statistics(double sample_rate, uint32_t seed)
  : m_sample_rate(sample_rate)
  , m_rng(seed)
  , m_quality_dist(PHRED_SCORE_MAX + 1)
  , m_gc_content_dist(101)
{
}

void
fastq_statistics::process(const fastq& read, size_t num_input_reads)
{
  m_number_of_input_reads += num_input_reads;
  m_number_of_output_reads++;

  const auto sequence_len = read.length();
  m_length_dist.resize_up_to(sequence_len + 1);
  m_length_dist.inc(sequence_len);

  if (std::generate_canonical<float, 32>(m_rng) <= m_sample_rate) {
    m_nucleotide_pos.resize_up_to(sequence_len);
    m_quality_pos.resize_up_to(sequence_len);

    m_number_of_sampled_reads++;

    const std::string_view sequence = read.sequence();
    const std::string_view qualities = read.qualities();

    indexed_count<ACGTN> nucls;
    for (size_t i = 0; i < sequence_len; ++i) {
      const auto nuc = sequence[i];

      nucls.inc(nuc);
      m_nucleotide_pos.inc_unsafe(nuc, i);

      const auto quality = qualities[i] - PHRED_OFFSET_MIN;
      m_quality_pos.inc_unsafe(nuc, i, quality);
      m_quality_dist.inc(quality);
    }

    auto n_at = nucls.get('A') + nucls.get('T');
    auto n_gc = nucls.get('G') + nucls.get('C');
    if (n_at || n_gc) {
      // uncalled bases are not considered for the length of the reads
      smoothed_gc_count(m_gc_content_dist, n_gc, n_gc + n_at);
    }
  }
}

fastq_statistics&
fastq_statistics::operator+=(const fastq_statistics& other)
{
  m_number_of_input_reads += other.m_number_of_input_reads;
  m_number_of_output_reads += other.m_number_of_output_reads;
  m_number_of_sampled_reads += other.m_number_of_sampled_reads;
  m_length_dist += other.m_length_dist;
  m_quality_dist += other.m_quality_dist;
  m_gc_content_dist += other.m_gc_content_dist;
  m_nucleotide_pos += other.m_nucleotide_pos;
  m_quality_pos += other.m_quality_pos;

  return *this;
}

trimming_statistics::trimming_statistics(double sample_rate)
  : singleton(std::make_shared<fastq_statistics>(sample_rate))
  , merged(std::make_shared<fastq_statistics>(sample_rate))
  , discarded(std::make_shared<fastq_statistics>(sample_rate))
{
  // Synchronize sampling of mate 1 and mate 2 reads
  const auto seed = prng_seed();
  read_1 = std::make_shared<fastq_statistics>(sample_rate, seed);
  read_2 = std::make_shared<fastq_statistics>(sample_rate, seed);
}

trimming_statistics&
trimming_statistics::operator+=(const trimming_statistics& other)
{
  *read_1 += *other.read_1;
  *read_2 += *other.read_2;
  *singleton += *other.singleton;
  *merged += *other.merged;
  *discarded += *other.discarded;

  insert_sizes += other.insert_sizes;
  adapter_trimmed_reads += other.adapter_trimmed_reads;
  adapter_trimmed_bases += other.adapter_trimmed_bases;
  reads_merged += other.reads_merged;
  terminal_pre_trimmed += other.terminal_pre_trimmed;
  terminal_post_trimmed += other.terminal_post_trimmed;
  poly_x_pre_trimmed_reads += other.poly_x_pre_trimmed_reads;
  poly_x_pre_trimmed_bases += other.poly_x_pre_trimmed_bases;
  poly_x_post_trimmed_reads += other.poly_x_post_trimmed_reads;
  poly_x_post_trimmed_bases += other.poly_x_post_trimmed_bases;
  low_quality_trimmed += other.low_quality_trimmed;
  total_trimmed += other.total_trimmed;
  filtered_min_length += other.filtered_min_length;
  filtered_max_length += other.filtered_max_length;
  filtered_ambiguous += other.filtered_ambiguous;
  filtered_mean_quality += other.filtered_mean_quality;
  filtered_low_complexity += other.filtered_low_complexity;

  return *this;
}

demux_statistics::demux_statistics(double sample_rate)
  : unidentified_stats_1(std::make_shared<fastq_statistics>(sample_rate))
  , unidentified_stats_2(std::make_shared<fastq_statistics>(sample_rate))
{
}

size_t
demux_statistics::total() const
{
  size_t total = unidentified + ambiguous;
  for (const auto& count : samples) {
    total += count.sum();
  }

  return total;
}

statistics::statistics(double sample_rate)
  : input_1(std::make_shared<fastq_statistics>(sample_rate))
  , input_2(std::make_shared<fastq_statistics>(sample_rate))
  , demultiplexing(std::make_shared<demux_statistics>(sample_rate))
{
}

statistics_builder::statistics_builder()
  : m_sample_count(0)
  , m_sample_rate(1.0)
  , m_max_unique(0)
  , m_adapter_id(0)
{
}

statistics_builder&
statistics_builder::sample_rate(double rate)
{
  m_sample_rate = rate;

  return *this;
}

statistics_builder&
statistics_builder::demultiplexing(size_t samples)
{
  m_sample_count = samples;

  return *this;
}

statistics_builder&
statistics_builder::estimate_duplication(size_t max_unique)
{
  m_max_unique = max_unique;

  return *this;
}

/** Enable adapter identification */
statistics_builder&
statistics_builder::adapter_identification(size_t max_length)
{
  m_adapter_id = max_length;

  return *this;
}

statistics
statistics_builder::initialize() const
{
  statistics stats(m_sample_rate);

  stats.duplication_1 = std::make_shared<duplication_statistics>(m_max_unique);
  stats.duplication_2 = std::make_shared<duplication_statistics>(m_max_unique);

  if (m_adapter_id) {
    stats.adapter_id = std::make_shared<adapter_id_statistics>(m_adapter_id);
  }

  return stats;
}

} // namespace adapterremoval

1	// SPDX-License-Identifier: GPL-3.0-or-later
2	// SPDX-FileCopyrightText: 2011 Stinus Lindgreen <stinus@binf.ku.dk>
3	// SPDX-FileCopyrightText: 2014 Mikkel Schubert <mikkelsch@gmail.com>
4	// SPDX-FileCopyrightText: 2010-17 Simon Andrews
5	#include "statistics.hpp"
6	#include "adapter_id.hpp" // for adapter_id_statistics
7	#include "debug.hpp" // for AR_REQUIRE
8	#include "fastq.hpp" // for ACGTN, fastq, ACGT
9	#include "fastq_enc.hpp" // for PHRED_OFFSET_MIN, PHRED_SCORE_MAX
10	#include "simd.hpp" // for size_t
11	#include "utilities.hpp" // for prng_seed
12	#include <algorithm> // for max, min
13	#include <cstdlib> // for size_t
14	#include <memory> // for make_shared, __shared_ptr_access, __shared_...
15	#include <string> // for basic_string, string
16	#include <string_view> // for string_view
17
18	namespace adapterremoval {
19
20	////////////////////////////////////////////////////////////////////////////////
21
22	const size_t DUPLICATION_LEVELS = 16;
23	const string_vec DUPLICATION_LABELS = { "1", "2", "3", "4",
24	"5", "6", "7", "8",
25	"9", ">10", ">50", ">100",
26	">500", ">1k", ">5k", ">10k" };
27
28	size_t
29	duplication_level(size_t count)	×
30	{
31	if (count > 10000) {	×
32	return 15;
33	} else if (count > 5000) {	×
34	return 14;
35	} else if (count > 1000) {	×
36	return 13;
37	} else if (count > 500) {	×
38	return 12;
39	} else if (count > 100) {	×
40	return 11;
41	} else if (count > 50) {	×
42	return 10;
43	} else if (count > 10) {	×
44	return 9;
45	} else {
46	return count - 1;	×
47	}
48	}
49
50	duplication_statistics::summary::summary()	×
51	: labels(DUPLICATION_LABELS)	×
52	, total_sequences(DUPLICATION_LEVELS)	×
53	, unique_sequences(DUPLICATION_LEVELS)	×
54	, unique_frac()	×
55	{
56	}
57
58	duplication_statistics::duplication_statistics(size_t max_unique_sequences)	×
59	: m_max_unique_sequences(max_unique_sequences)	×
60	, m_sequence_counts()	×
61	, m_sequences_counted()	×
62	, m_sequences_counted_at_max()	×
63	{
64	m_sequence_counts.reserve(max_unique_sequences);	×
65	}
66
67	void
68	duplication_statistics::process(const fastq& read)	×
69	{
70	if (m_max_unique_sequences) {	×
71	if (read.length() > 75) {	×
72	insert(read.sequence().substr(0, 50));	×
73	} else {
74	insert(read.sequence());	×
75	}
76	}
77	}
78
79	duplication_statistics::summary
80	duplication_statistics::summarize() const	×
81	{
82	// Histogram of sequence duplication counts
83	robin_hood::unordered_flat_map<size_t, size_t> histogram;	×
84	for (const auto& it : m_sequence_counts) {	×
85	histogram[it.second]++;	×
86	}
87
88	duplication_statistics::summary result;	×
89
90	result.total_sequences.resize_up_to(DUPLICATION_LEVELS);	×
91	result.unique_sequences.resize_up_to(DUPLICATION_LEVELS);	×
92
93	for (const auto& it : histogram) {	×
94	const auto level = it.first;	×
95	const auto count = correct_count(level, it.second);	×
96
97	const auto slot = duplication_level(level);	×
98	result.total_sequences.inc(slot, count * level);	×
99	result.unique_sequences.inc(slot, count);	×
100	}
101
102	const auto unique_count = result.unique_sequences.sum();	×
103	const auto total_count = result.total_sequences.sum();	×
104
105	if (total_count) {	×
106	result.unique_frac = unique_count / static_cast<double>(total_count);	×
107	result.total_sequences = result.total_sequences / total_count;	×
108	result.unique_sequences = result.unique_sequences / unique_count;	×
109	} else {
110	result.unique_frac = 1.0;	×
111	}
112
113	return result;	×
114	}
115
116	double
117	duplication_statistics::correct_count(size_t bin, size_t count) const	×
118	{
119	// Adapted from
120	// https://github.com/s-andrews/FastQC/blob/v0.11.9/uk/ac/babraham/FastQC/Modules/DuplicationLevel.java#L158
121
122	// See if we can bail out early
123	if (m_sequences_counted_at_max == m_sequences_counted) {	×
124	return count;	×
125	}
126
127	// If there aren't enough sequences left to hide another sequence with this
128	// count then we can also skip the calculation
129	if (m_sequences_counted - count < m_sequences_counted_at_max) {	×
130	return count;	×
131	}
132
133	// If not then we need to see what the likelihood is that we had another
134	// sequence with this number of observations which we would have missed.
135
136	// We'll start by working out the probability of NOT seeing a sequence with
137	// this duplication level within the first countAtLimit sequences of
138	// count. This is easier than calculating the probability of
139	// seeing it.
140	double p_not_seeing_at_limit = 1.0;	×
141
142	// To save doing long calculations which are never going to produce anything
143	// meaningful we'll set a limit to our p-value calculation. This is the
144	// probability below which we won't increase our count by 0.01 of an
145	// observation. Once we're below this we stop caring about the corrected
146	// value since it's going to be so close to the observed value that we can
147	// just return that instead.
148	const double limit_of_caring = 1.0 - (count / (count + 0.01));	×
149
150	for (size_t i = 0; i < m_sequences_counted_at_max; i++) {	×
151	p_not_seeing_at_limit *= ((m_sequences_counted - i) - bin) /	×
152	static_cast<double>(m_sequences_counted - i);
153
154	if (p_not_seeing_at_limit < limit_of_caring) {	×
155	p_not_seeing_at_limit = 0;
156	break;
157	}
158	}
159
160	// Scale by the chance of seeing
161	return count / (1 - p_not_seeing_at_limit);	×
162	}
163
164	void
165	duplication_statistics::insert(const std::string& key)	×
166	{
167	m_sequences_counted++;	×
168
169	const auto unique_seqs = m_sequence_counts.size();	×
170	if (unique_seqs >= m_max_unique_sequences) {	×
171	auto it = m_sequence_counts.find(key);	×
172	if (it != m_sequence_counts.end()) {	×
173	it->second++;	×
174	}
175	} else {
176	m_sequence_counts[key]++;	×
177	m_sequences_counted_at_max++;	×
178	}
179	}
180
181	////////////////////////////////////////////////////////////////////////////////
182
183	namespace {
184
185	void
186	smoothed_gc_count(rates& distribution, size_t count, size_t length)	×
187	{
188	// counts are smoothed across adjacent (percentage) bins
189	const auto lower = std::max<double>(0, 100 * (count - 0.5) / length);	×
190	const auto upper = std::min<double>(100, 100 * (count + 0.5) / length);	×
191
192	const size_t lower_i = lower;	×
193	const size_t upper_i = upper;	×
194
195	const auto lower_f = lower - lower_i;	×
196	const auto upper_f = upper - upper_i;	×
197
198	if (lower_i == upper_i) {	×
199	// Count falls in a single bin
200	distribution.inc(lower_i, (upper - lower));	×
201	} else {
202	// Increment first bin/partially overlapped adjacent bins
203	distribution.inc(lower_i, (1 - lower_f));	×
204	distribution.inc(upper_i, upper_f);	×
205
206	// Ranges are half open, except for the final bin
207	const auto final_i = count == length ? 101 : upper_i;	×
208	for (size_t i = lower + 1; i < final_i; ++i) {	×
209	distribution.inc(i);	×
210	}
211	}
212	}
213
214	} // namespace
215
216	fastq_statistics::fastq_statistics(double sample_rate)	×
217	: fastq_statistics(sample_rate, prng_seed())	×
218	{
219	}
220
221	fastq_statistics::fastq_statistics(double sample_rate, uint32_t seed)	×
222	: m_sample_rate(sample_rate)	×
223	, m_rng(seed)	×
224	, m_quality_dist(PHRED_SCORE_MAX + 1)	×
225	, m_gc_content_dist(101)	×
226	{
227	}
228
229	void
230	fastq_statistics::process(const fastq& read, size_t num_input_reads)	×
231	{
232	m_number_of_input_reads += num_input_reads;	×
233	m_number_of_output_reads++;	×
234
235	const auto sequence_len = read.length();	×
236	m_length_dist.resize_up_to(sequence_len + 1);	×
237	m_length_dist.inc(sequence_len);	×
238
239	if (std::generate_canonical<float, 32>(m_rng) <= m_sample_rate) {	×
240	m_nucleotide_pos.resize_up_to(sequence_len);	×
241	m_quality_pos.resize_up_to(sequence_len);	×
242
243	m_number_of_sampled_reads++;	×
244
245	const std::string_view sequence = read.sequence();	×
246	const std::string_view qualities = read.qualities();	×
247
248	indexed_count<ACGTN> nucls;	×
249	for (size_t i = 0; i < sequence_len; ++i) {	×
250	const auto nuc = sequence[i];	×
251
252	nucls.inc(nuc);	×
253	m_nucleotide_pos.inc_unsafe(nuc, i);	×
254
255	const auto quality = qualities[i] - PHRED_OFFSET_MIN;	×
256	m_quality_pos.inc_unsafe(nuc, i, quality);	×
257	m_quality_dist.inc(quality);	×
258	}
259
260	auto n_at = nucls.get('A') + nucls.get('T');	×
261	auto n_gc = nucls.get('G') + nucls.get('C');	×
262	if (n_at \|\| n_gc) {	×
263	// uncalled bases are not considered for the length of the reads
264	smoothed_gc_count(m_gc_content_dist, n_gc, n_gc + n_at);	×
265	}
266	}
267	}
268
269	fastq_statistics&
270	fastq_statistics::operator+=(const fastq_statistics& other)	×
271	{
272	m_number_of_input_reads += other.m_number_of_input_reads;	×
273	m_number_of_output_reads += other.m_number_of_output_reads;	×
274	m_number_of_sampled_reads += other.m_number_of_sampled_reads;	×
275	m_length_dist += other.m_length_dist;	×
276	m_quality_dist += other.m_quality_dist;	×
277	m_gc_content_dist += other.m_gc_content_dist;	×
278	m_nucleotide_pos += other.m_nucleotide_pos;	×
279	m_quality_pos += other.m_quality_pos;	×
280
281	return *this;	×
282	}
283
284	trimming_statistics::trimming_statistics(double sample_rate)	×
285	: singleton(std::make_shared<fastq_statistics>(sample_rate))	×
286	, merged(std::make_shared<fastq_statistics>(sample_rate))	×
287	, discarded(std::make_shared<fastq_statistics>(sample_rate))	×
288	{
289	// Synchronize sampling of mate 1 and mate 2 reads
290	const auto seed = prng_seed();	×
291	read_1 = std::make_shared<fastq_statistics>(sample_rate, seed);	×
292	read_2 = std::make_shared<fastq_statistics>(sample_rate, seed);	×
293	}
294
295	trimming_statistics&
296	trimming_statistics::operator+=(const trimming_statistics& other)	×
297	{
298	read_1 += other.read_1;	×
299	read_2 += other.read_2;	×
300	singleton += other.singleton;	×
301	merged += other.merged;	×
302	discarded += other.discarded;	×
303
304	insert_sizes += other.insert_sizes;	×
305	adapter_trimmed_reads += other.adapter_trimmed_reads;	×
306	adapter_trimmed_bases += other.adapter_trimmed_bases;	×
307	reads_merged += other.reads_merged;	×
308	terminal_pre_trimmed += other.terminal_pre_trimmed;	×
309	terminal_post_trimmed += other.terminal_post_trimmed;	×
310	poly_x_pre_trimmed_reads += other.poly_x_pre_trimmed_reads;	×
311	poly_x_pre_trimmed_bases += other.poly_x_pre_trimmed_bases;	×
312	poly_x_post_trimmed_reads += other.poly_x_post_trimmed_reads;	×
313	poly_x_post_trimmed_bases += other.poly_x_post_trimmed_bases;	×
314	low_quality_trimmed += other.low_quality_trimmed;	×
315	total_trimmed += other.total_trimmed;	×
316	filtered_min_length += other.filtered_min_length;	×
317	filtered_max_length += other.filtered_max_length;	×
318	filtered_ambiguous += other.filtered_ambiguous;	×
319	filtered_mean_quality += other.filtered_mean_quality;	×
320	filtered_low_complexity += other.filtered_low_complexity;	×
321
322	return *this;	×
323	}
324
325	demux_statistics::demux_statistics(double sample_rate)	×
326	: unidentified_stats_1(std::make_shared<fastq_statistics>(sample_rate))	×
327	, unidentified_stats_2(std::make_shared<fastq_statistics>(sample_rate))	×
328	{
329	}
330
331	size_t
332	demux_statistics::total() const	×
333	{
334	size_t total = unidentified + ambiguous;	×
NEW 335	for (const auto& count : samples) {	×
NEW 336	total += count.sum();	×
337	}
338
339	return total;	×
340	}
341
342	statistics::statistics(double sample_rate)	×
343	: input_1(std::make_shared<fastq_statistics>(sample_rate))	×
344	, input_2(std::make_shared<fastq_statistics>(sample_rate))	×
345	, demultiplexing(std::make_shared<demux_statistics>(sample_rate))	×
346	{
347	}
348
349	statistics_builder::statistics_builder()	×
350	: m_sample_count(0)	×
351	, m_sample_rate(1.0)	×
352	, m_max_unique(0)	×
353	, m_adapter_id(0)	×
354	{
355	}
356
357	statistics_builder&
358	statistics_builder::sample_rate(double rate)	×
359	{
360	m_sample_rate = rate;	×
361
362	return *this;	×
363	}
364
365	statistics_builder&
366	statistics_builder::demultiplexing(size_t samples)	×
367	{
368	m_sample_count = samples;	×
369
370	return *this;	×
371	}
372
373	statistics_builder&
374	statistics_builder::estimate_duplication(size_t max_unique)	×
375	{
376	m_max_unique = max_unique;	×
377
378	return *this;	×
379	}
380
381	/** Enable adapter identification */
382	statistics_builder&
383	statistics_builder::adapter_identification(size_t max_length)	×
384	{
385	m_adapter_id = max_length;	×
386
387	return *this;	×
388	}
389
390	statistics
391	statistics_builder::initialize() const	×
392	{
393	statistics stats(m_sample_rate);	×
394
395	stats.duplication_1 = std::make_shared<duplication_statistics>(m_max_unique);	×
396	stats.duplication_2 = std::make_shared<duplication_statistics>(m_max_unique);	×
397
398	if (m_adapter_id) {	×
399	stats.adapter_id = std::make_shared<adapter_id_statistics>(m_adapter_id);	×
400	}
401
402	return stats;	×
403	}	×
404
405	} // namespace adapterremoval

MikkelSchubert / adapterremoval / #89

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous