• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #73

22 Mar 2025 10:19PM UTC coverage: 27.088% (-0.002%) from 27.09%
#73

push

travis-ci

web-flow
updates to formating and licensing headers (#95)

* use SPDX headers for licenses

This reduces verbosity and works around an issue with clang-format where
some formatting would not be applied due to the \***\ headers.

* set AllowAllArgumentsOnNextLine and InsertBraces

This results in more consistent formatting using clang-format

18 of 61 new or added lines in 12 files covered. (29.51%)

343 existing lines in 3 files now uncovered.

2601 of 9602 relevant lines covered (27.09%)

4259.01 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/reports_html.cpp
1
// SPDX-License-Identifier: GPL-3.0-or-later
2
// SPDX-FileCopyrightText: 2022 Mikkel Schubert <mikkelsch@gmail.com>
3
#include "adapter_id.hpp"            // for adapter_id_statistics
4
#include "counts.hpp"                // for counts, indexed_count, counts_tmpl
5
#include "debug.hpp"                 // for AR_REQUIRE
6
#include "fastq.hpp"                 // for ACGT, ACGT::values, fastq, ACGTN
7
#include "json.hpp"                  // for json_dict, json_list, json_ptr
8
#include "logging.hpp"               // for log_stream, error
9
#include "main.hpp"                  // for VERSION, NAME
10
#include "managed_io.hpp"            // for managed_io
11
#include "output.hpp"                // for DEV_NULL, output_files
12
#include "reports.hpp"               // for write_html_report
13
#include "reports_template_html.hpp" // for html_frequency_plot, html_demultiple...
14
#include "sequence_sets.hpp"         // for adapter_set
15
#include "simd.hpp"                  // for size_t
16
#include "statistics.hpp"            // for fastq_stats_ptr, fastq_statistics
17
#include "strutils.hpp"              // for format_percentage, format_rough...
18
#include "userconfig.hpp"            // for userconfig, ar_command, DEV_NULL
19
#include <algorithm>                 // for max
20
#include <cctype>                    // for toupper
21
#include <cerrno>                    // for errno
22
#include <cmath>                     // for fmod
23
#include <cstdint>                   // for uint64_t
24
#include <cstring>                   // for size_t, strerror
25
#include <iomanip>                   // for operator<<, setprecision, setw
26
#include <memory>                    // for __shared_ptr_access, shared_ptr
27
#include <sstream>                   // for ostringstream
28
#include <string>                    // for string, operator==, to_string
29
#include <utility>                   // for pair
30
#include <vector>                    // for vector
31

32
namespace adapterremoval {
33

34
namespace {
35

36
using fastq_stats_vec = std::vector<fastq_stats_ptr>;
37
using template_ptr = std::unique_ptr<html_template>;
38

39
//! Size chosen to allow fitting two pages side-by-side on a 1920 width display
40
const char* const FIGURE_WIDTH = "736";
41
//! Per figure width for two-column facet figures; approximate
42
const char* const FACET_WIDTH_2 = "351";
43
//! Per figure width for one-column facet figures; approximate
44
const char* const FACET_WIDTH_1 = FIGURE_WIDTH;
45

46
////////////////////////////////////////////////////////////////////////////////
47

48
/** Escapes a string that needs to be embedded in a JS */
49
std::string
50
json_encode(const std::string& s)
×
51
{
52
  return json_token::from_str(s)->to_string();
×
53
}
54

55
/** JSON escaped string */
56
std::string
57
operator""_json(const char* s, size_t length)
×
58
{
59
  return json_encode(std::string(s, length));
×
60
}
61

62
std::string
63
runtime_to_str(double seconds)
×
64
{
65
  std::ostringstream ss;
×
66

67
  if (seconds >= 3600.0) {
×
68
    ss << static_cast<size_t>(seconds / 3600.0) << " "
×
69
       << (seconds >= 7200.0 ? "hours, " : "hour, ") << std::setw(2);
×
70
  }
71

72
  if (seconds >= 60.0) {
×
73
    auto minutes = static_cast<size_t>(std::fmod(seconds, 3600.0) / 60.0);
×
74
    ss << minutes << " "
×
75
       << ((!minutes || minutes >= 120) ? "minutes" : "minute") << ", and "
×
76
       << std::setw(4);
×
77
  }
78

79
  ss << std::fixed << std::setprecision(1) << std::fmod(seconds, 60.0)
×
80
     << " seconds";
×
81

82
  return ss.str();
×
83
}
84

85
std::string
86
mean_of_bp_counts(const counts& count)
×
87
{
88
  auto reads = count.sum();
×
89
  auto bases = count.product();
×
90

91
  if (!reads) {
×
92
    return "NA";
×
93
  }
94

95
  if (bases % reads == 0) {
×
96
    return std::to_string(bases / reads) + " bp";
×
97
  }
98

99
  std::ostringstream ss;
×
100
  ss << std::fixed << std::setprecision(1)
×
101
     << (bases / static_cast<double>(reads)) << " bp";
×
102

103
  return ss.str();
×
104
}
105

106
/**
107
 * VEGA-lite will omit plots if there are no values; this function therefore
108
 * ensures that at least one value is written for a given measurement.
109
 */
110
template<typename T>
111
counts_tmpl<T>
112
require_values(counts_tmpl<T> r, T fallback = T())
×
113
{
114
  if (r.size()) {
×
115
    return r;
×
116
  }
117

118
  return counts_tmpl<T>({ fallback });
×
119
}
120

121
std::string
122
format_average_bases(const reads_and_bases& counts)
×
123
{
124
  const auto reads = counts.reads();
×
125

126
  if (reads) {
×
127
    return format_fraction(counts.bases(), reads, 1) + " bp";
×
128
  } else {
129
    return "NA";
×
130
  }
131
}
132

133
////////////////////////////////////////////////////////////////////////////////
134

135
class io_summary_writer
136
{
137
public:
138
  enum class io
139
  {
140
    input,
141
    output
142
  };
143

144
  io_summary_writer(std::ostream& output, const io type)
×
145
    : m_output(output)
×
146
    , m_type(type)
×
147

148
  {
149
  }
150

151
  void write_head(const std::string& title, const std::string& href)
×
152
  {
153
    html_summary_io_head().set_title(title).set_href(href).write(m_output);
×
154
  }
155

156
  void write_row(const std::string& title, const fastq_statistics& stats)
×
157
  {
158
    const auto n_reads = (m_type == io::input) ? stats.number_of_input_reads()
×
159
                                               : stats.number_of_output_reads();
×
160
    const auto total = stats.quality_dist().sum();
×
161

162
    html_summary_io_row()
×
163
      .set_name(title)
×
164
      .set_n_reads(format_rough_number(n_reads))
×
165
      .set_n_bases(format_rough_number(stats.length_dist().product()))
×
166
      .set_lengths(mean_of_bp_counts(stats.length_dist()))
×
167
      .set_q30(format_percentage(stats.quality_dist().sum(30), total))
×
168
      .set_q20(format_percentage(stats.quality_dist().sum(20), total))
×
169
      .set_ns(format_percentage(stats.nucleotides_pos('N').sum(), total))
×
170
      .set_gc(format_percentage(stats.nucleotides_gc_pos().sum(), total))
×
171
      .write(m_output);
×
172
  }
173

174
  void write_tail() { html_summary_io_tail().write(m_output); }
×
175

176
private:
177
  std::ostream& m_output;
178
  io m_type;
179
};
180

181
std::string
182
build_base_qualities(const fastq_stats_vec& reads, const string_vec& names)
×
183
{
184
  json_list qualities;
×
185

186
  for (size_t i = 0; i < reads.size(); ++i) {
×
187
    const auto& stats = *reads.at(i);
×
188

189
    auto total_quality = stats.qualities_pos();
×
190
    auto total_bases = stats.nucleotides_pos();
×
191

192
    for (const auto nucleotide : ACGT::values) {
×
193
      const auto nucleotides = stats.nucleotides_pos(nucleotide);
×
194
      const auto quality = stats.qualities_pos(nucleotide);
×
195

196
      auto dict = qualities.dict();
×
197
      dict->str("read", names.at(i));
×
198
      dict->i64("offset", 1);
×
199
      dict->str("group", std::string(1, ::toupper(nucleotide)));
×
200
      dict->f64_vec("y", quality / nucleotides);
×
201
    }
202

203
    auto dict = qualities.dict();
×
204
    dict->str("read", names.at(i));
×
205
    dict->i64("offset", 1);
×
206
    dict->str("group", "Mean");
×
207

208
    // Ensure that values get written, to prevent the plot being omitted
209
    dict->f64_vec("y", require_values(total_quality / total_bases));
×
210
  }
211

212
  return qualities.to_string();
×
213
}
214

215
std::string
216
build_quality_distribution(const fastq_stats_vec& reads,
×
217
                           const string_vec& names)
218
{
219
  json_list data;
×
220

221
  for (size_t i = 0; i < reads.size(); ++i) {
×
222
    const auto& stats = reads.at(i);
×
223
    auto count = stats->quality_dist().trim();
×
224
    // A max that should give a uniform look to most data
225
    count.resize_up_to(44);
×
226

227
    const auto m = data.dict();
×
228
    m->str("group", names.at(i));
×
229
    m->i64("offset", 0);
×
230
    m->i64_vec("y", count);
×
231
  }
232

233
  return data.to_string();
×
234
}
235

236
std::string
237
build_base_content(const fastq_stats_vec& reads, const string_vec& names)
×
238
{
239
  json_list content;
×
240

241
  for (size_t i = 0; i < reads.size(); ++i) {
×
242
    const auto& stats = *reads.at(i);
×
243

244
    auto total_bases = stats.nucleotides_pos();
×
245

246
    for (const auto nucleotide : ACGTN::values) {
×
247
      const auto bases = stats.nucleotides_pos(nucleotide);
×
248

249
      const auto dict = content.dict();
×
250
      dict->str("read", names.at(i));
×
251
      dict->i64("offset", 1);
×
252
      dict->str("group", std::string(1, nucleotide));
×
253

254
      // Ensure that values get written, to prevent the plot being omitted
255
      dict->f64_vec("y", require_values(bases / total_bases));
×
256
    }
257

258
    {
×
259
      const auto gc_content = stats.nucleotides_gc_pos();
×
260
      auto dict = content.dict();
×
261
      dict->str("read", names.at(i));
×
262
      dict->i64("offset", 1);
×
263
      dict->str("group", "GC");
×
264

265
      // Ensure that values get written, to prevent the plot being omitted
266
      dict->f64_vec("y", require_values(gc_content / total_bases));
×
267
    }
268
  }
269

270
  return content.to_string();
×
271
}
272

273
////////////////////////////////////////////////////////////////////////////////
274
// Main sections
275

276
void
277
write_html_sampling_note(const userconfig& config,
×
278
                         const std::string& label,
279
                         const fastq_statistics& stats,
280
                         std::ostream& output)
281
{
282
  if (config.report_sample_rate < 1.0) {
×
283
    html_sampling_note()
×
284
      .set_label(label)
×
285
      .set_reads(format_rough_number((stats.number_of_sampled_reads())))
×
286
      .set_pct(format_percentage(stats.number_of_sampled_reads(),
×
287
                                 stats.number_of_input_reads()))
×
288
      .write(output);
×
289
  }
290
}
291

292
void
293
write_html_summary_section(const userconfig& config,
×
294
                           const statistics& stats,
295
                           std::ostream& output)
296
{
297
  html_head().set_title(config.report_title).write(output);
×
298

299
  html_body_start().set_title(config.report_title).write(output);
×
300

301
  // Basic information about the executable / call
302
  {
×
303
    html_summary()
×
304
      .set_date_and_time(userconfig::start_time)
×
305
      .set_version(VERSION)
×
306
      .set_command(shell_escape_command(config.args))
×
307
      .set_runtime(runtime_to_str(config.runtime()))
×
308
      .write(output);
×
309
  }
310

311
  fastq_statistics output_1;
×
312
  fastq_statistics output_2;
×
313
  fastq_statistics merged;
×
314
  fastq_statistics singleton;
×
315
  fastq_statistics discarded;
×
316

317
  for (const auto& it : stats.trimming) {
×
318
    output_1 += *it->read_1;
×
319
    output_2 += *it->read_2;
×
320
    merged += *it->merged;
×
321
    singleton += *it->singleton;
×
322
    discarded += *it->discarded;
×
323
  }
324

325
  if (config.paired_ended_mode) {
×
326
    // Summary statistics for input files
327
    {
×
328
      fastq_statistics totals;
×
329
      totals += *stats.input_1;
×
330
      totals += *stats.input_2;
×
331

332
      io_summary_writer summary(output, io_summary_writer::io::input);
×
333
      summary.write_head("Input", "summary-input");
×
334
      if (config.paired_ended_mode) {
×
335
        summary.write_row("Summary", totals);
×
336
        summary.write_row("File 1", *stats.input_1);
×
337
        summary.write_row("File 2", *stats.input_2);
×
338
      }
339
      summary.write_tail();
×
340

341
      write_html_sampling_note(config, "input", totals, output);
×
342
    }
343

344
    // Summary statistics for output files
345
    if (config.run_type != ar_command::report_only) {
×
346
      fastq_statistics totals;
×
347
      totals += output_1;
×
348
      totals += output_2;
×
349
      totals += merged;
×
350
      totals += singleton;
×
351
      // discarded reads not counted in the output
352
      // totals += discarded;
353

354
      io_summary_writer summary{ output, io_summary_writer::io::output };
×
355
      summary.write_head("Output", "summary-output");
×
356
      summary.write_row("Passed*", totals);
×
357
      if (config.paired_ended_mode) {
×
358
        summary.write_row("File 1", output_1);
×
359
        summary.write_row("File 2", output_2);
×
360

361
        if (config.is_read_merging_enabled()) {
×
362
          summary.write_row("Merged", merged);
×
363
        }
364

365
        if (config.is_any_filtering_enabled()) {
×
366
          summary.write_row("Singleton", singleton);
×
367
        }
368
      }
369

370
      if (config.is_any_filtering_enabled()) {
×
371
        summary.write_row("Discarded*", discarded);
×
372
      }
373
      summary.write_tail();
×
374

375
      write_html_sampling_note(config, "output", totals, output);
×
376

377
      // Note regarding passed / discarded reads
378
      html_output_footnote()
×
379
        .set_symbol("*")
×
380
        .set_html("The <b>Passed</b> column includes all read types except "
×
381
                  "for <b>Discarded</b> reads.")
382
        .write(output);
×
383
    }
384
  } else if (config.run_type == ar_command::report_only) {
×
385
    io_summary_writer summary{ output, io_summary_writer::io::input };
×
386
    summary.write_head("Input summary", "summary-input");
×
387
    summary.write_row("Input", *stats.input_1);
×
388
    summary.write_tail();
×
389

390
    write_html_sampling_note(config, "input", *stats.input_1, output);
×
391
  }
392

393
  else {
394
    io_summary_writer summary{ output, io_summary_writer::io::input };
×
395
    summary.write_head("Input/Output summary", "summary-input-output");
×
396
    summary.write_row("Input", *stats.input_1);
×
397
    summary.write_row("Output", output_1);
×
398
    if (config.is_any_filtering_enabled()) {
×
399
      summary.write_row("Discarded*", discarded);
×
400
    }
401
    summary.write_tail();
×
402

403
    fastq_statistics totals;
×
404
    totals += *stats.input_1;
×
405
    totals += output_1;
×
406

407
    write_html_sampling_note(config, "input/output", totals, output);
×
408

409
    if (config.is_any_filtering_enabled()) {
×
410
      // Note regarding discarded reads in output
411
      html_output_footnote()
×
412
        .set_symbol("*")
×
413
        .set_html("<b>Discarded</b> reads are not included in the "
×
414
                  "<b>Output</b> column.")
415
        .write(output);
×
416
    }
417
  }
418
}
419

420
//! Trimming statistics
421
struct trimming_stats
422
{
423
  size_t id;
424
  //! Processing stage relative to adapter trimming (pre, X, post)
425
  std::string stage;
426
  //! Row label 1 (step)
427
  std::string label_1;
428
  //! Row label 1 (sub-step)
429
  std::string label_2;
430
  //! Whether or not this step is enabled by command-line options
431
  bool enabled;
432
  //! Number of reads/bases trimmed/filtered
433
  reads_and_bases count;
434
};
435

436
void
437
write_html_trimming_stats(std::ostream& output,
×
438
                          const std::vector<trimming_stats>& stats,
439
                          const reads_and_bases& totals)
440
{
441
  size_t n_processing_steps = 0;
×
442
  size_t n_processing_steps_on = 0;
×
443
  size_t n_filtering_steps = 0;
×
444
  size_t n_filtering_steps_on = 0;
×
445

446
  size_t last_id = -1;
×
447
  size_t last_enabled = -1;
×
448
  for (const auto& it : stats) {
×
449
    if (it.id != last_id) {
×
450
      if (it.stage == "Processing") {
×
451
        n_processing_steps++;
×
452
      } else if (it.stage == "Filtering") {
×
453
        n_filtering_steps++;
×
454
      }
455

456
      last_id = it.id;
×
457
    }
458

459
    if (it.enabled && it.id != last_enabled) {
×
460
      if (it.stage == "Processing") {
×
461
        n_processing_steps_on++;
×
462
      } else if (it.stage == "Filtering") {
×
463
        n_filtering_steps_on++;
×
464
      }
465

466
      last_enabled = it.id;
×
467
    }
468
  }
469

470
  html_summary_trimming_head().write(output);
×
471

472
  std::string previous_stage;
×
473
  std::string previous_label_1;
×
474

475
  for (const auto& it : stats) {
×
476
    if (it.enabled) {
×
477
      const auto label_1 = it.label_1 == previous_label_1 ? "" : it.label_1;
×
478
      const auto stage = it.stage == previous_stage ? "" : it.stage;
×
479

480
      previous_stage = it.stage;
×
481
      previous_label_1 = it.label_1;
×
482

483
      html_summary_trimming_row()
×
484
        .set_stage(stage)
×
485
        .set_label_1(label_1)
×
486
        .set_label_2(it.label_2)
×
487
        .set_reads(format_rough_number(it.count.reads()))
×
488
        .set_pct_reads(format_percentage(it.count.reads(), totals.reads()))
×
489
        .set_bases(format_rough_number(it.count.bases()))
×
490
        .set_pct_bases(format_percentage(it.count.bases(), totals.bases()))
×
491
        .set_avg_bases(format_average_bases(it.count))
×
492
        .write(output);
×
493
    }
494
  }
495

496
  html_summary_trimming_tail()
×
497
    .set_n_enabled_filt(std::to_string(n_filtering_steps_on))
×
498
    .set_n_total_filt(std::to_string(n_filtering_steps))
×
499
    .set_n_enabled_proc(std::to_string(n_processing_steps_on))
×
500
    .set_n_total_proc(std::to_string(n_processing_steps))
×
501
    .write(output);
×
502
}
503

504
//! Filtering statistics
505
struct filtering_stats
506
{
507
  //! Filtering step
508
  std::string label;
509
  //! Whether or not this step is enabled by command-line options
510
  bool enabled;
511
  //! Number of reads/bases trimmed/filtered
512
  reads_and_bases count;
513
};
514

515
reads_and_bases
516
summarize_input(const fastq_stats_ptr& ptr)
×
517
{
518
  const auto n_bases = ptr->length_dist().product();
×
519
  AR_REQUIRE(n_bases >= 0);
×
520

521
  return reads_and_bases{ ptr->number_of_input_reads(),
×
522
                          static_cast<uint64_t>(n_bases) };
523
}
524

525
void
526
build_polyx_trimming_rows(std::vector<trimming_stats>& out,
×
527
                          const std::string& polyx_nucleotides,
528
                          const indexed_count<ACGT>& reads,
529
                          const indexed_count<ACGT>& bases,
530
                          const size_t id)
531
{
532
  for (const auto nucleotide : ACGT::values) {
×
533
    out.push_back(
×
534
      { id,
535
        "Processing",
536
        "Poly-X tails",
537
        std::string(1, nucleotide),
538
        polyx_nucleotides.find(nucleotide) != std::string::npos,
×
539
        reads_and_bases(reads.get(nucleotide), bases.get(nucleotide)) });
×
540
  }
541

542
  out.push_back({ id,
×
543
                  "Processing",
544
                  "Poly-X tails",
545
                  "*",
546
                  polyx_nucleotides.size() > 1,
×
547
                  reads_and_bases(reads.sum(), bases.sum()) });
×
548
}
549

550
void
551
write_html_processing_section(const userconfig& config,
×
552
                              const statistics& stats,
553
                              std::ostream& output)
554
{
555
  trimming_statistics totals;
×
556
  for (const auto& it : stats.trimming) {
×
557
    totals += *it;
×
558
  }
559

560
  uint64_t adapter_reads = 0;
×
561
  uint64_t adapter_bases = 0;
×
562

563
  for (size_t i = 0; i < config.samples.adapters().size(); ++i) {
×
564
    adapter_reads += totals.adapter_trimmed_reads.get(i);
×
565
    adapter_bases += totals.adapter_trimmed_bases.get(i);
×
566
  }
567

568
  const auto total_input =
×
569
    summarize_input(stats.input_1) + summarize_input(stats.input_2);
×
570

571
  reads_and_bases total_output;
×
572
  for (const auto& it : stats.trimming) {
×
573
    total_output += summarize_input(it->read_1);
×
574
    total_output += summarize_input(it->read_2);
×
575
    total_output += summarize_input(it->singleton);
×
576
    total_output += summarize_input(it->merged);
×
577
  }
578

579
  // Trimming steps prior to adapter trimming
580
  size_t step_id = 0;
×
581
  std::vector<trimming_stats> trimming = {
×
582
    { step_id++, "Input", "Raw reads", "-", true, total_input },
×
583
    { step_id++,
×
584
      "Processing",
585
      "Terminal bases",
586
      "-",
587
      config.is_terminal_base_pre_trimming_enabled(),
×
588
      totals.terminal_pre_trimmed },
589
  };
590

591
  build_polyx_trimming_rows(trimming,
×
592
                            config.pre_trim_poly_x,
×
593
                            totals.poly_x_pre_trimmed_reads,
594
                            totals.poly_x_pre_trimmed_bases,
595
                            step_id++);
596

597
  trimming.push_back({ step_id++,
×
598
                       "Processing",
599
                       "Adapters",
600
                       "-",
601
                       config.is_adapter_trimming_enabled(),
×
602
                       reads_and_bases(adapter_reads, adapter_bases) });
603

604
  trimming.push_back({ step_id++,
×
605
                       "Processing",
606
                       "Merging",
607
                       "-",
608
                       config.is_read_merging_enabled(),
×
609
                       totals.reads_merged });
610

611
  trimming.push_back({ step_id++,
×
612
                       "Processing",
613
                       "Terminal bases",
614
                       "-",
615
                       config.is_terminal_base_post_trimming_enabled(),
×
616
                       totals.terminal_post_trimmed });
617

618
  build_polyx_trimming_rows(trimming,
×
619
                            config.post_trim_poly_x,
×
620
                            totals.poly_x_post_trimmed_reads,
621
                            totals.poly_x_post_trimmed_bases,
622
                            step_id++);
623

624
  trimming.push_back({ step_id++,
×
625
                       "Processing",
626
                       "Low quality bases",
627
                       "-",
628
                       config.is_low_quality_trimming_enabled(),
×
629
                       totals.low_quality_trimmed });
630

631
  trimming.push_back({ step_id++,
×
632
                       "Filtering",
633
                       "Short reads",
634
                       "-",
635
                       config.is_short_read_filtering_enabled(),
×
636
                       totals.filtered_min_length });
637

638
  trimming.push_back({ step_id++,
×
639
                       "Filtering",
640
                       "Long reads",
641
                       "-",
642
                       config.is_long_read_filtering_enabled(),
×
643
                       totals.filtered_max_length });
644
  trimming.push_back({ step_id++,
×
645
                       "Filtering",
646
                       "Ambiguous bases",
647
                       "-",
648
                       config.is_ambiguous_base_filtering_enabled(),
×
649
                       totals.filtered_ambiguous });
650
  trimming.push_back({ step_id++,
×
651
                       "Filtering",
652
                       "Mean quality",
653
                       "-",
654
                       config.is_mean_quality_filtering_enabled(),
×
655
                       totals.filtered_mean_quality });
656
  trimming.push_back({ step_id++,
×
657
                       "Filtering",
658
                       "Low complexity reads",
659
                       "-",
660
                       config.is_low_complexity_filtering_enabled(),
×
661
                       totals.filtered_low_complexity });
662

663
  trimming.push_back(
×
664
    { step_id++, "Output", "Filtered reads", "-", true, total_output });
×
665

666
  write_html_trimming_stats(output, trimming, total_input);
×
667
}
668

669
void
670
write_html_section_title(const std::string& title, std::ostream& output)
×
671
{
672
  html_h2_tag().set_title(title).set_href(to_lower(title)).write(output);
×
673
}
674

675
void
676
write_html_io_section(const userconfig& config,
×
677
                      std::ostream& output,
678
                      const std::string& title,
679
                      fastq_stats_vec statistics,
680
                      string_vec names,
681
                      const fastq_stats_ptr& merged = fastq_stats_ptr())
682
{
683
  AR_REQUIRE(statistics.size() == names.size());
×
684

685
  write_html_section_title(title, output);
×
686

687
  const char* dynamic_width =
×
688
    config.paired_ended_mode || merged ? FACET_WIDTH_2 : FACET_WIDTH_1;
×
689

690
  html_plot_title()
×
691
    .set_href(to_lower(title) + "-position-qualities")
×
692
    .set_title("Position quality distribution")
×
693
    .write(output);
×
694
  html_facet_line_plot()
×
695
    .set_x_axis(config.is_read_merging_enabled() && merged ? "null"
×
696
                                                           : "Position"_json)
697
    .set_y_axis("Phred score"_json)
×
698
    .set_width(dynamic_width)
×
699
    .set_values(build_base_qualities(statistics, names))
×
700
    .write(output);
×
701

702
  if (config.is_read_merging_enabled() && merged) {
×
703
    html_facet_line_plot()
×
704
      .set_x_axis("Position"_json)
×
705
      .set_y_axis("Phred score"_json)
×
706
      .set_width(FIGURE_WIDTH)
×
707
      .set_values(build_base_qualities({ merged }, { "Merged" }))
×
708
      .write(output);
×
709
  }
710

711
  html_plot_title()
×
712
    .set_href(to_lower(title) + "-nucleotide-content")
×
713
    .set_title("Nucleotide content")
×
714
    .write(output);
×
715
  html_facet_line_plot()
×
716
    .set_x_axis(config.is_read_merging_enabled() && merged ? "null"
×
717
                                                           : "Position"_json)
718
    .set_y_axis("Frequency"_json)
×
719
    .set_width(dynamic_width)
×
720
    .set_values(build_base_content(statistics, names))
×
721
    .write(output);
×
722

723
  if (config.is_read_merging_enabled() && merged) {
×
724
    html_facet_line_plot()
×
725
      .set_x_axis("Position"_json)
×
726
      .set_y_axis("Frequency"_json)
×
727
      .set_width(FIGURE_WIDTH)
×
728
      .set_values(build_base_content({ merged }, { "Merged" }))
×
729
      .write(output);
×
730

731
    // Subsequent plots should include merged reads
732
    names.push_back("Merged");
×
733
    statistics.push_back(merged);
×
734
  }
735

736
  html_plot_title()
×
737
    .set_href(to_lower(title) + "-quality-scores")
×
738
    .set_title("Quality score distribution")
×
739
    .write(output);
×
740
  html_frequency_plot()
×
741
    .set_x_axis("Phred score"_json)
×
742
    .set_y_axis("Frequency"_json)
×
743
    .set_width(FIGURE_WIDTH)
×
744
    .set_values(build_quality_distribution(statistics, names))
×
745
    .write(output);
×
746

747
  {
×
748
    json_list data;
×
749

750
    for (size_t i = 0; i < statistics.size(); ++i) {
×
751
      const auto m = data.dict();
×
752
      m->str("group", names.at(i));
×
753
      m->i64("offset", 0);
×
754
      m->f64_vec("y", statistics.at(i)->gc_content());
×
755
    }
756

757
    html_plot_title()
×
758
      .set_href(to_lower(title) + "-gc-content")
×
759
      .set_title("GC Content")
×
760
      .write(output);
×
761
    html_frequency_plot()
×
762
      .set_x_axis("%GC"_json)
×
763
      .set_y_axis("Frequency"_json)
×
764
      .set_width(FIGURE_WIDTH)
×
765
      .set_values(data.to_string())
×
766
      .write(output);
×
767
  }
768
}
769

770
void
771
write_html_input_section(const userconfig& config,
×
772
                         const statistics& stats,
773
                         std::ostream& output)
774
{
775
  fastq_stats_vec stats_vec = { stats.input_1 };
×
776
  string_vec names = { "File 1" };
×
777

778
  if (config.paired_ended_mode) {
×
779
    stats_vec.push_back(stats.input_2);
×
780
    names.emplace_back("File 2");
×
781
  }
782

NEW
783
  write_html_io_section(config,
×
784
                        output,
785
                        "Input",
NEW
786
                        std::move(stats_vec),
×
NEW
787
                        std::move(names));
×
788
}
789

790
void
791
write_html_analyses_section(const userconfig& config,
×
792
                            const statistics& stats,
793
                            std::ostream& output)
794

795
{
796
  write_html_section_title("Analyses", output);
×
797

798
  // Insert size distribution
799
  if (config.paired_ended_mode) {
×
800
    counts insert_sizes;
×
801
    for (const auto& it : stats.trimming) {
×
802
      insert_sizes += it->insert_sizes;
×
803
    }
804

805
    json_list samples;
×
806
    const auto sample = samples.dict();
×
807
    sample->str("group", "insert_sizes");
×
808
    sample->i64("offset", 0);
×
809
    sample->i64_vec("y", insert_sizes);
×
810

811
    // FIXME: Specify "identified reads" when in demultiplexing mode and
812
    // correct format_percentage to merged / n_identified.
813
    std::ostringstream ss;
×
814
    ss << "Insert sizes inferred for "
×
815
       << format_percentage(insert_sizes.sum(),
×
816
                            stats.input_1->number_of_input_reads())
×
817
       << " of reads";
×
818

819
    html_plot_title()
×
820
      .set_href("analyses-insert-sizes")
×
821
      .set_title("Insert-size distribution")
×
822
      .write(output);
×
823
    html_plot_sub_title().set_sub_title(ss.str()).write(output);
×
824
    html_frequency_plot()
×
825
      .set_x_axis("Insert size"_json)
×
826
      .set_y_axis("Frequency"_json)
×
827
      .set_legend("null")
×
828
      .set_width(FIGURE_WIDTH)
×
829
      .set_values(samples.to_string())
×
830
      .write(output);
×
831

832
    if (config.run_type == ar_command::report_only) {
×
833
      html_output_note()
×
834
        .set_text(
×
835
          "Insert size distribution inferred using adapter-free alignments.")
836
        .write(output);
×
837
    }
838
  }
839

840
  // Consensus adapter sequence inference
841
  if (config.paired_ended_mode && config.run_type == ar_command::report_only) {
×
842
    AR_REQUIRE(stats.adapter_id);
×
843

844
    const auto adapter_1 = stats.adapter_id->adapter1.summarize();
×
845
    const auto adapter_2 = stats.adapter_id->adapter2.summarize();
×
846

847
    // Consensus adapter sequences
848
    {
×
849
      const auto reference_adapters =
×
850
        config.samples.adapters().to_read_orientation().front();
×
851
      std::string reference_adapter_1{ reference_adapters.first };
×
852
      std::string reference_adapter_2{ reference_adapters.second };
×
853

854
      html_consensus_adapter_head()
×
855
        .set_overlapping_pairs(
×
856
          format_rough_number(stats.adapter_id->aligned_pairs))
×
857
        .set_pairs_with_adapters(
×
858
          format_rough_number(stats.adapter_id->pairs_with_adapters))
×
859
        .write(output);
×
860

861
      html_consensus_adapter_table()
×
862
        .set_name_1("--adapter1")
×
863
        .set_reference_1(reference_adapter_1)
×
864
        .set_alignment_1(adapter_1.compare_with(reference_adapter_1))
×
865
        .set_consensus_1(adapter_1.adapter().sequence())
×
866
        .set_qualities_1(adapter_1.adapter().qualities())
×
867
        .set_name_2("--adapter2")
×
868
        .set_reference_2(reference_adapter_2)
×
869
        .set_alignment_2(adapter_2.compare_with(reference_adapter_2))
×
870
        .set_consensus_2(adapter_2.adapter().sequence())
×
871
        .set_qualities_2(adapter_2.adapter().qualities())
×
872
        .write(output);
×
873
    }
874

875
    // Top N most common 5' kmers in adapter fragments
876
    {
×
877
      const auto& top_kmers_1 = adapter_1.top_kmers();
×
878
      const auto& top_kmers_2 = adapter_2.top_kmers();
×
879

880
      html_consensus_adapter_kmer_head()
×
881
        .set_n_kmers(std::to_string(consensus_adapter_stats::top_n_kmers))
×
882
        .set_kmer_length(std::to_string(consensus_adapter_stats::kmer_length))
×
883
        .write(output);
×
884

885
      const auto kmers = std::max(top_kmers_1.size(), top_kmers_2.size());
×
886
      for (size_t i = 0; i < kmers; ++i) {
×
887
        html_consensus_adapter_kmer_row row;
×
888
        row.set_index(std::to_string(i + 1));
×
889

890
        if (top_kmers_1.size() > i) {
×
891
          const auto& kmer = top_kmers_1.at(i);
×
892

893
          row.set_kmer_1(kmer.first)
×
894
            .set_count_1(format_rough_number(kmer.second))
×
895
            .set_pct_1(format_percentage(kmer.second, adapter_1.total_kmers()));
×
896
        } else {
897
          row.set_kmer_1("").set_count_1("").set_pct_1("");
×
898
        }
899

900
        if (top_kmers_2.size() > i) {
×
901
          const auto& kmer = top_kmers_2.at(i);
×
902

903
          row.set_kmer_2(kmer.first)
×
904
            .set_count_2(format_rough_number(kmer.second))
×
905
            .set_pct_2(format_percentage(kmer.second, adapter_2.total_kmers()));
×
906
        } else {
907
          row.set_kmer_2("").set_count_2("").set_pct_2("");
×
908
        }
909

910
        row.write(output);
×
911
      }
912

913
      html_consensus_adapter_kmer_tail().write(output);
×
914
    }
915
  }
916
}
917

918
std::pair<std::string, std::string>
919
join_barcodes(const sample& s)
×
920
{
921
  string_vec mate_1;
×
922
  string_vec mate_2;
×
923

924
  for (const auto& barcode : s) {
×
925
    mate_1.emplace_back(barcode.barcode_1);
×
926
    mate_2.emplace_back(barcode.barcode_2);
×
927
  }
928

929
  return {
×
930
    join_text(mate_1, "<br/>"),
×
931
    join_text(mate_2, "<br/>"),
×
932
  };
933
}
934

935
void
936
write_html_demultiplexing_section(const userconfig& config,
×
937
                                  const statistics& stats,
938
                                  std::ostream& output)
939

940
{
941
  write_html_section_title("Demultiplexing", output);
×
942

943
  json_list data;
×
944

945
  const size_t input_reads = stats.input_1->number_of_input_reads() +
×
946
                             stats.input_2->number_of_input_reads();
×
947

948
  for (size_t i = 0; i < config.samples.size(); ++i) {
×
949
    auto m = data.dict();
×
950
    m->str("x", config.samples.at(i).name());
×
951

952
    if (input_reads) {
×
953
      m->f64("y", (100.0 * stats.demultiplexing->samples.at(i)) / input_reads);
×
954
    } else {
955
      m->null("y");
×
956
    }
957
  }
958

959
  html_plot_title()
×
960
    .set_href("demux-samples")
×
961
    .set_title("Samples identified")
×
962
    .write(output);
×
963
  html_bar_plot()
×
964
    .set_x_axis("Samples"_json)
×
965
    .set_y_axis("Percent"_json)
×
966
    .set_width(FIGURE_WIDTH)
×
967
    .set_values(data.to_string())
×
968
    .write(output);
×
969

970
  html_demultiplexing_head().write(output);
×
971

972
  {
×
973
    const size_t unidentified = stats.demultiplexing->unidentified;
×
974

975
    fastq_statistics total;
×
976
    total += *stats.demultiplexing->unidentified_stats_1;
×
977
    total += *stats.demultiplexing->unidentified_stats_2;
×
978

979
    const auto output_reads = total.length_dist().sum();
×
980
    const auto output_bp = total.nucleotides_pos().sum();
×
981

982
    html_demultiplexing_row()
×
983
      .set_n("")
×
984
      .set_barcode_1("")
×
985
      .set_barcode_2("")
×
986
      .set_name("<b>Unidentified</b>")
×
987
      .set_pct(format_percentage(unidentified, input_reads, 2))
×
988
      .set_reads(format_rough_number(output_reads))
×
989
      .set_bp(format_rough_number(output_bp))
×
990
      .set_length(mean_of_bp_counts(total.length_dist()))
×
991
      .set_gc(format_percentage(total.nucleotides_gc_pos().sum(), output_bp))
×
992
      .write(output);
×
993
  }
994

995
  size_t sample_idx = 0;
×
996
  for (const auto& sample : config.samples) {
×
997
    const auto& sample_stats = *stats.trimming.at(sample_idx);
×
998

999
    fastq_statistics total;
×
1000

1001
    total += *sample_stats.read_1;
×
1002
    total += *sample_stats.read_2;
×
1003
    total += *sample_stats.merged;
×
1004
    total += *sample_stats.singleton;
×
1005
    // Not included in overview:
1006
    // total += *sample.discarded;
1007

1008
    const auto output_reads = total.length_dist().sum();
×
1009
    const auto output_bp = total.nucleotides_pos().sum();
×
1010
    const auto barcodes = join_barcodes(sample);
×
1011

1012
    html_demultiplexing_row()
×
1013
      .set_n(std::to_string(sample_idx + 1))
×
1014
      .set_barcode_1(barcodes.first)
×
1015
      .set_barcode_2(barcodes.second)
×
1016
      .set_name(sample.name())
×
NEW
1017
      .set_pct(format_percentage(stats.demultiplexing->samples.at(sample_idx),
×
1018
                                 input_reads,
1019
                                 2))
1020
      .set_reads(format_rough_number(output_reads))
×
1021
      .set_bp(format_rough_number(output_bp))
×
1022
      .set_length(mean_of_bp_counts(total.length_dist()))
×
1023
      .set_gc(format_percentage(total.nucleotides_gc_pos().sum(), output_bp))
×
1024
      .write(output);
×
1025
  }
1026

1027
  html_demultiplexing_tail().write(output);
×
1028
}
1029

1030
void
1031
write_html_output_section(const userconfig& config,
×
1032
                          const statistics& stats,
1033
                          std::ostream& output)
1034

1035
{
1036
  fastq_stats_vec stats_vec;
×
1037
  string_vec names;
×
1038

1039
  auto merged = std::make_shared<fastq_statistics>();
×
1040

1041
  {
×
1042
    auto output_1 = std::make_shared<fastq_statistics>();
×
1043
    auto output_2 = std::make_shared<fastq_statistics>();
×
1044
    auto singleton = std::make_shared<fastq_statistics>();
×
1045
    auto discarded = std::make_shared<fastq_statistics>();
×
1046

1047
    for (const auto& it : stats.trimming) {
×
1048
      *output_1 += *it->read_1;
×
1049
      *output_2 += *it->read_2;
×
1050
      *merged += *it->merged;
×
1051
      *singleton += *it->singleton;
×
1052
      *discarded += *it->discarded;
×
1053
    }
1054

1055
    stats_vec.push_back(output_1);
×
1056
    names.emplace_back("Output 1");
×
1057

1058
    if (config.paired_ended_mode) {
×
1059
      stats_vec.push_back(output_2);
×
1060
      names.emplace_back("Output 2");
×
1061

1062
      if (config.is_any_filtering_enabled()) {
×
1063
        stats_vec.push_back(singleton);
×
1064
        names.emplace_back("Singleton");
×
1065
      }
1066
    }
1067

1068
    if (config.is_any_filtering_enabled()) {
×
1069
      stats_vec.push_back(discarded);
×
1070
      names.emplace_back("Discarded");
×
1071
    }
1072
  }
1073

NEW
1074
  write_html_io_section(config,
×
1075
                        output,
1076
                        "Output",
NEW
1077
                        std::move(stats_vec),
×
NEW
1078
                        std::move(names),
×
1079
                        merged);
1080
}
1081

1082
} // namespace
1083

1084
////////////////////////////////////////////////////////////////////////////////
1085

1086
bool
1087
write_html_report(const userconfig& config,
×
1088
                  const statistics& stats,
1089
                  const std::string& filename)
1090
{
1091
  if (filename == DEV_NULL) {
×
1092
    // User disabled the report
1093
    return true;
1094
  }
1095

1096
  std::ostringstream output;
×
1097

1098
  write_html_summary_section(config, stats, output);
×
1099

1100
  if (config.run_type != ar_command::demultiplex_only &&
×
1101
      config.run_type != ar_command::report_only) {
1102
    write_html_processing_section(config, stats, output);
×
1103
  }
1104

1105
  write_html_input_section(config, stats, output);
×
1106

1107
  if (config.paired_ended_mode || config.run_type == ar_command::report_only) {
×
1108
    write_html_analyses_section(config, stats, output);
×
1109
  }
1110

1111
  if (config.is_demultiplexing_enabled()) {
×
1112
    write_html_demultiplexing_section(config, stats, output);
×
1113
  }
1114

1115
  if (config.run_type != ar_command::report_only) {
×
1116
    write_html_output_section(config, stats, output);
×
1117
  }
1118

1119
  html_body_end().write(output);
×
1120

1121
  try {
×
1122
    managed_writer writer{ filename };
×
1123
    writer.write(output.str());
×
1124
    writer.close();
×
1125
  } catch (const std::ios_base::failure& error) {
×
1126
    log::error() << "Error writing JSON report to '" << filename << "':\n"
×
1127
                 << indent_lines(error.what());
×
1128
    return false;
×
1129
  }
×
1130

1131
  return true;
×
1132
}
1133

1134
} // namespace adapterremoval
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc