• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #90

13 Apr 2025 11:30AM UTC coverage: 27.089% (+0.04%) from 27.054%
#90

push

travis-ci

web-flow
fixes to HTML barcode table (#117)

* fix barcode 1 being shown for barcode 2
* improve placement of barcode toggle

0 of 1 new or added line in 1 file covered. (0.0%)

32 existing lines in 1 file now uncovered.

2723 of 10052 relevant lines covered (27.09%)

4011.01 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/reports_html.cpp
1
// SPDX-License-Identifier: GPL-3.0-or-later
2
// SPDX-FileCopyrightText: 2022 Mikkel Schubert <mikkelsch@gmail.com>
3
#include "adapter_id.hpp"            // for adapter_id_statistics
4
#include "counts.hpp"                // for counts, indexed_count, counts_tmpl
5
#include "debug.hpp"                 // for AR_REQUIRE
6
#include "fastq.hpp"                 // for ACGT, ACGT::values, fastq, ACGTN
7
#include "json.hpp"                  // for json_dict, json_list, json_ptr
8
#include "logging.hpp"               // for log_stream, error
9
#include "main.hpp"                  // for VERSION, NAME
10
#include "managed_io.hpp"            // for managed_io
11
#include "output.hpp"                // for DEV_NULL, output_files
12
#include "reports.hpp"               // for write_html_report
13
#include "reports_template_html.hpp" // for html_frequency_plot, html_demultiple...
14
#include "sequence_sets.hpp"         // for adapter_set
15
#include "simd.hpp"                  // for size_t
16
#include "statistics.hpp"            // for fastq_stats_ptr, fastq_statistics
17
#include "strutils.hpp"              // for format_percentage, format_rough...
18
#include "userconfig.hpp"            // for userconfig, ar_command, DEV_NULL
19
#include <algorithm>                 // for max
20
#include <cctype>                    // for toupper
21
#include <cerrno>                    // for errno
22
#include <cmath>                     // for fmod
23
#include <cstdint>                   // for uint64_t
24
#include <cstring>                   // for size_t, strerror
25
#include <iomanip>                   // for operator<<, setprecision, setw
26
#include <memory>                    // for __shared_ptr_access, shared_ptr
27
#include <sstream>                   // for ostringstream
28
#include <string>                    // for string, operator==, to_string
29
#include <string_view>               // for string_view
30
#include <utility>                   // for pair
31
#include <vector>                    // for vector
32

33
namespace adapterremoval {
34

35
namespace {
36

37
using fastq_stats_vec = std::vector<fastq_stats_ptr>;
38
using template_ptr = std::unique_ptr<html_template>;
39

40
//! Size chosen to allow fitting two pages side-by-side on a 1920 width display
41
const char* const FIGURE_WIDTH = "736";
42
//! Per figure width for two-column facet figures; approximate
43
const char* const FACET_WIDTH_2 = "351";
44
//! Per figure width for one-column facet figures; approximate
45
const char* const FACET_WIDTH_1 = FIGURE_WIDTH;
46

47
////////////////////////////////////////////////////////////////////////////////
48

49
/** Escapes a string that needs to be embedded in a JS */
50
std::string
51
json_encode(const std::string& s)
×
52
{
53
  return json_token::from_str(s)->to_string();
×
54
}
55

56
/** JSON escaped string */
57
std::string
58
operator""_json(const char* s, size_t length)
×
59
{
60
  return json_encode(std::string(s, length));
×
61
}
62

63
std::string
64
runtime_to_str(double seconds)
×
65
{
66
  std::ostringstream ss;
×
67

68
  if (seconds >= 3600.0) {
×
69
    ss << static_cast<size_t>(seconds / 3600.0) << " "
×
70
       << (seconds >= 7200.0 ? "hours, " : "hour, ") << std::setw(2);
×
71
  }
72

73
  if (seconds >= 60.0) {
×
74
    auto minutes = static_cast<size_t>(std::fmod(seconds, 3600.0) / 60.0);
×
75
    ss << minutes << " "
×
76
       << ((!minutes || minutes >= 120) ? "minutes" : "minute") << ", and "
×
77
       << std::setw(4);
×
78
  }
79

80
  ss << std::fixed << std::setprecision(1) << std::fmod(seconds, 60.0)
×
81
     << " seconds";
×
82

83
  return ss.str();
×
84
}
85

86
std::string
87
mean_of_bp_counts(const counts& count)
×
88
{
89
  auto reads = count.sum();
×
90
  auto bases = count.product();
×
91

92
  if (!reads) {
×
93
    return "NA";
×
94
  }
95

96
  if (bases % reads == 0) {
×
97
    return std::to_string(bases / reads) + " bp";
×
98
  }
99

100
  std::ostringstream ss;
×
101
  ss << std::fixed << std::setprecision(1)
×
102
     << (bases / static_cast<double>(reads)) << " bp";
×
103

104
  return ss.str();
×
105
}
106

107
/**
108
 * VEGA-lite will omit plots if there are no values; this function therefore
109
 * ensures that at least one value is written for a given measurement.
110
 */
111
template<typename T>
112
counts_tmpl<T>
113
require_values(counts_tmpl<T> r, T fallback = T())
×
114
{
115
  if (r.size()) {
×
116
    return r;
×
117
  }
118

119
  return counts_tmpl<T>({ fallback });
×
120
}
121

122
std::string
123
format_average_bases(const reads_and_bases& counts)
×
124
{
125
  const auto reads = counts.reads();
×
126

127
  if (reads) {
×
128
    return format_fraction(counts.bases(), reads, 1) + " bp";
×
129
  } else {
130
    return "NA";
×
131
  }
132
}
133

134
////////////////////////////////////////////////////////////////////////////////
135

136
class io_summary_writer
137
{
138
public:
139
  enum class io
140
  {
141
    input,
142
    output
143
  };
144

145
  io_summary_writer(std::ostream& output, const io type)
×
146
    : m_output(output)
×
147
    , m_type(type)
×
148

149
  {
150
  }
151

152
  void write_head(const std::string& title, const std::string& href)
×
153
  {
154
    html_summary_io_head().set_title(title).set_href(href).write(m_output);
×
155
  }
156

157
  void write_row(const std::string& title, const fastq_statistics& stats)
×
158
  {
159
    const auto n_reads = (m_type == io::input) ? stats.number_of_input_reads()
×
160
                                               : stats.number_of_output_reads();
×
161
    const auto total = stats.quality_dist().sum();
×
162

163
    html_summary_io_row()
×
164
      .set_name(title)
×
165
      .set_n_reads(format_rough_number(n_reads))
×
166
      .set_n_bases(format_rough_number(stats.length_dist().product()))
×
167
      .set_lengths(mean_of_bp_counts(stats.length_dist()))
×
168
      .set_q30(format_percentage(stats.quality_dist().sum(30), total))
×
169
      .set_q20(format_percentage(stats.quality_dist().sum(20), total))
×
170
      .set_ns(format_percentage(stats.nucleotides_pos('N').sum(), total))
×
171
      .set_gc(format_percentage(stats.nucleotides_gc_pos().sum(), total))
×
172
      .write(m_output);
×
173
  }
174

175
  void write_tail() { html_summary_io_tail().write(m_output); }
×
176

177
private:
178
  std::ostream& m_output;
179
  io m_type;
180
};
181

182
std::string
183
build_base_qualities(const fastq_stats_vec& reads, const string_vec& names)
×
184
{
185
  json_list qualities;
×
186

187
  for (size_t i = 0; i < reads.size(); ++i) {
×
188
    const auto& stats = *reads.at(i);
×
189

190
    auto total_quality = stats.qualities_pos();
×
191
    auto total_bases = stats.nucleotides_pos();
×
192

193
    for (const auto nucleotide : ACGT::values) {
×
194
      const auto nucleotides = stats.nucleotides_pos(nucleotide);
×
195
      const auto quality = stats.qualities_pos(nucleotide);
×
196

197
      auto dict = qualities.dict();
×
198
      dict->str("read", names.at(i));
×
199
      dict->i64("offset", 1);
×
200
      dict->str("group", std::string(1, ::toupper(nucleotide)));
×
201
      dict->f64_vec("y", quality / nucleotides);
×
202
    }
203

204
    auto dict = qualities.dict();
×
205
    dict->str("read", names.at(i));
×
206
    dict->i64("offset", 1);
×
207
    dict->str("group", "Mean");
×
208

209
    // Ensure that values get written, to prevent the plot being omitted
210
    dict->f64_vec("y", require_values(total_quality / total_bases));
×
211
  }
212

213
  return qualities.to_string();
×
214
}
215

216
std::string
217
build_quality_distribution(const fastq_stats_vec& reads,
×
218
                           const string_vec& names)
219
{
220
  json_list data;
×
221

222
  for (size_t i = 0; i < reads.size(); ++i) {
×
223
    const auto& stats = reads.at(i);
×
224
    auto count = stats->quality_dist().trim();
×
225
    // A max that should give a uniform look to most data
226
    count.resize_up_to(44);
×
227

228
    const auto m = data.dict();
×
229
    m->str("group", names.at(i));
×
230
    m->i64("offset", 0);
×
231
    m->i64_vec("y", count);
×
232
  }
233

234
  return data.to_string();
×
235
}
236

237
std::string
238
build_base_content(const fastq_stats_vec& reads, const string_vec& names)
×
239
{
240
  json_list content;
×
241

242
  for (size_t i = 0; i < reads.size(); ++i) {
×
243
    const auto& stats = *reads.at(i);
×
244

245
    auto total_bases = stats.nucleotides_pos();
×
246

247
    for (const auto nucleotide : ACGTN::values) {
×
248
      const auto bases = stats.nucleotides_pos(nucleotide);
×
249

250
      const auto dict = content.dict();
×
251
      dict->str("read", names.at(i));
×
252
      dict->i64("offset", 1);
×
253
      dict->str("group", std::string(1, nucleotide));
×
254

255
      // Ensure that values get written, to prevent the plot being omitted
256
      dict->f64_vec("y", require_values(bases / total_bases));
×
257
    }
258

259
    {
×
260
      const auto gc_content = stats.nucleotides_gc_pos();
×
261
      auto dict = content.dict();
×
262
      dict->str("read", names.at(i));
×
263
      dict->i64("offset", 1);
×
264
      dict->str("group", "GC");
×
265

266
      // Ensure that values get written, to prevent the plot being omitted
267
      dict->f64_vec("y", require_values(gc_content / total_bases));
×
268
    }
269
  }
270

271
  return content.to_string();
×
272
}
273

274
////////////////////////////////////////////////////////////////////////////////
275
// Main sections
276

277
void
278
write_html_sampling_note(const userconfig& config,
×
279
                         const std::string& label,
280
                         const fastq_statistics& stats,
281
                         std::ostream& output)
282
{
283
  if (config.report_sample_rate < 1.0) {
×
284
    html_sampling_note()
×
285
      .set_label(label)
×
286
      .set_reads(format_rough_number((stats.number_of_sampled_reads())))
×
287
      .set_pct(format_percentage(stats.number_of_sampled_reads(),
×
288
                                 stats.number_of_input_reads()))
×
289
      .write(output);
×
290
  }
291
}
292

293
void
294
write_html_summary_section(const userconfig& config,
×
295
                           const statistics& stats,
296
                           std::ostream& output)
297
{
298
  html_head().set_title(config.report_title).write(output);
×
299

300
  html_body_start().set_title(config.report_title).write(output);
×
301

302
  // Basic information about the executable / call
303
  {
×
304
    html_summary()
×
305
      .set_date_and_time(userconfig::start_time)
×
306
      .set_version(VERSION)
×
307
      .set_command(shell_escape_command(config.args))
×
308
      .set_runtime(runtime_to_str(config.runtime()))
×
309
      .write(output);
×
310
  }
311

312
  fastq_statistics output_1;
×
313
  fastq_statistics output_2;
×
314
  fastq_statistics merged;
×
315
  fastq_statistics singleton;
×
316
  fastq_statistics discarded;
×
317

318
  for (const auto& it : stats.trimming) {
×
319
    output_1 += *it->read_1;
×
320
    output_2 += *it->read_2;
×
321
    merged += *it->merged;
×
322
    singleton += *it->singleton;
×
323
    discarded += *it->discarded;
×
324
  }
325

326
  if (config.paired_ended_mode) {
×
327
    // Summary statistics for input files
328
    {
×
329
      fastq_statistics totals;
×
330
      totals += *stats.input_1;
×
331
      totals += *stats.input_2;
×
332

333
      io_summary_writer summary(output, io_summary_writer::io::input);
×
334
      summary.write_head("Input", "summary-input");
×
335
      if (config.paired_ended_mode) {
×
336
        summary.write_row("Summary", totals);
×
337
        summary.write_row("File 1", *stats.input_1);
×
338
        summary.write_row("File 2", *stats.input_2);
×
339
      }
340
      summary.write_tail();
×
341

342
      write_html_sampling_note(config, "input", totals, output);
×
343
    }
344

345
    // Summary statistics for output files
346
    if (config.run_type != ar_command::report_only) {
×
347
      fastq_statistics totals;
×
348
      totals += output_1;
×
349
      totals += output_2;
×
350
      totals += merged;
×
351
      totals += singleton;
×
352
      // discarded reads not counted in the output
353
      // totals += discarded;
354

355
      io_summary_writer summary{ output, io_summary_writer::io::output };
×
356
      summary.write_head("Output", "summary-output");
×
357
      summary.write_row("Passed*", totals);
×
358
      if (config.paired_ended_mode) {
×
359
        summary.write_row("File 1", output_1);
×
360
        summary.write_row("File 2", output_2);
×
361

362
        if (config.is_read_merging_enabled()) {
×
363
          summary.write_row("Merged", merged);
×
364
        }
365

366
        if (config.is_any_filtering_enabled()) {
×
367
          summary.write_row("Singleton", singleton);
×
368
        }
369
      }
370

371
      if (config.is_any_filtering_enabled()) {
×
372
        summary.write_row("Discarded*", discarded);
×
373
      }
374
      summary.write_tail();
×
375

376
      write_html_sampling_note(config, "output", totals, output);
×
377

378
      // Note regarding passed / discarded reads
379
      html_output_footnote()
×
380
        .set_symbol("*")
×
381
        .set_html("The <b>Passed</b> column includes all read types except "
×
382
                  "for <b>Discarded</b> reads.")
383
        .write(output);
×
384
    }
385
  } else if (config.run_type == ar_command::report_only) {
×
386
    io_summary_writer summary{ output, io_summary_writer::io::input };
×
387
    summary.write_head("Input summary", "summary-input");
×
388
    summary.write_row("Input", *stats.input_1);
×
389
    summary.write_tail();
×
390

391
    write_html_sampling_note(config, "input", *stats.input_1, output);
×
392
  }
393

394
  else {
395
    io_summary_writer summary{ output, io_summary_writer::io::input };
×
396
    summary.write_head("Input/Output summary", "summary-input-output");
×
397
    summary.write_row("Input", *stats.input_1);
×
398
    summary.write_row("Output", output_1);
×
399
    if (config.is_any_filtering_enabled()) {
×
400
      summary.write_row("Discarded*", discarded);
×
401
    }
402
    summary.write_tail();
×
403

404
    fastq_statistics totals;
×
405
    totals += *stats.input_1;
×
406
    totals += output_1;
×
407

408
    write_html_sampling_note(config, "input/output", totals, output);
×
409

410
    if (config.is_any_filtering_enabled()) {
×
411
      // Note regarding discarded reads in output
412
      html_output_footnote()
×
413
        .set_symbol("*")
×
414
        .set_html("<b>Discarded</b> reads are not included in the "
×
415
                  "<b>Output</b> column.")
416
        .write(output);
×
417
    }
418
  }
419
}
420

421
//! Trimming statistics
422
struct trimming_stats
423
{
424
  size_t id;
425
  //! Processing stage relative to adapter trimming (pre, X, post)
426
  std::string stage;
427
  //! Row label 1 (step)
428
  std::string label_1;
429
  //! Row label 1 (sub-step)
430
  std::string label_2;
431
  //! Whether or not this step is enabled by command-line options
432
  bool enabled;
433
  //! Number of reads/bases trimmed/filtered
434
  reads_and_bases count;
435
};
436

437
void
438
write_html_trimming_stats(std::ostream& output,
×
439
                          const std::vector<trimming_stats>& stats,
440
                          const reads_and_bases& totals)
441
{
442
  size_t n_processing_steps = 0;
×
443
  size_t n_processing_steps_on = 0;
×
444
  size_t n_filtering_steps = 0;
×
445
  size_t n_filtering_steps_on = 0;
×
446

447
  size_t last_id = -1;
×
448
  size_t last_enabled = -1;
×
449
  for (const auto& it : stats) {
×
450
    if (it.id != last_id) {
×
451
      if (it.stage == "Processing") {
×
452
        n_processing_steps++;
×
453
      } else if (it.stage == "Filtering") {
×
454
        n_filtering_steps++;
×
455
      }
456

457
      last_id = it.id;
×
458
    }
459

460
    if (it.enabled && it.id != last_enabled) {
×
461
      if (it.stage == "Processing") {
×
462
        n_processing_steps_on++;
×
463
      } else if (it.stage == "Filtering") {
×
464
        n_filtering_steps_on++;
×
465
      }
466

467
      last_enabled = it.id;
×
468
    }
469
  }
470

471
  html_summary_trimming_head().write(output);
×
472

473
  std::string previous_stage;
×
474
  std::string previous_label_1;
×
475

476
  for (const auto& it : stats) {
×
477
    if (it.enabled) {
×
478
      const auto label_1 = it.label_1 == previous_label_1 ? "" : it.label_1;
×
479
      const auto stage = it.stage == previous_stage ? "" : it.stage;
×
480

481
      previous_stage = it.stage;
×
482
      previous_label_1 = it.label_1;
×
483

484
      html_summary_trimming_row()
×
485
        .set_stage(stage)
×
486
        .set_label_1(label_1)
×
487
        .set_label_2(it.label_2)
×
488
        .set_reads(format_rough_number(it.count.reads()))
×
489
        .set_pct_reads(format_percentage(it.count.reads(), totals.reads()))
×
490
        .set_bases(format_rough_number(it.count.bases()))
×
491
        .set_pct_bases(format_percentage(it.count.bases(), totals.bases()))
×
492
        .set_avg_bases(format_average_bases(it.count))
×
493
        .write(output);
×
494
    }
495
  }
496

497
  html_summary_trimming_tail()
×
498
    .set_n_enabled_filt(std::to_string(n_filtering_steps_on))
×
499
    .set_n_total_filt(std::to_string(n_filtering_steps))
×
500
    .set_n_enabled_proc(std::to_string(n_processing_steps_on))
×
501
    .set_n_total_proc(std::to_string(n_processing_steps))
×
502
    .write(output);
×
503
}
504

505
//! Filtering statistics
506
struct filtering_stats
507
{
508
  //! Filtering step
509
  std::string label;
510
  //! Whether or not this step is enabled by command-line options
511
  bool enabled;
512
  //! Number of reads/bases trimmed/filtered
513
  reads_and_bases count;
514
};
515

516
reads_and_bases
517
summarize_input(const fastq_stats_ptr& ptr)
×
518
{
519
  const auto n_bases = ptr->length_dist().product();
×
520
  AR_REQUIRE(n_bases >= 0);
×
521

522
  return reads_and_bases{ ptr->number_of_input_reads(),
×
523
                          static_cast<uint64_t>(n_bases) };
524
}
525

526
void
527
build_polyx_trimming_rows(std::vector<trimming_stats>& out,
×
528
                          const std::string& polyx_nucleotides,
529
                          const indexed_count<ACGT>& reads,
530
                          const indexed_count<ACGT>& bases,
531
                          const size_t id)
532
{
533
  for (const auto nucleotide : ACGT::values) {
×
534
    out.push_back(
×
535
      { id,
536
        "Processing",
537
        "Poly-X tails",
538
        std::string(1, nucleotide),
539
        polyx_nucleotides.find(nucleotide) != std::string::npos,
×
540
        reads_and_bases(reads.get(nucleotide), bases.get(nucleotide)) });
×
541
  }
542

543
  out.push_back({ id,
×
544
                  "Processing",
545
                  "Poly-X tails",
546
                  "*",
547
                  polyx_nucleotides.size() > 1,
×
548
                  reads_and_bases(reads.sum(), bases.sum()) });
×
549
}
550

551
void
552
write_html_processing_section(const userconfig& config,
×
553
                              const statistics& stats,
554
                              std::ostream& output)
555
{
556
  trimming_statistics totals;
×
557
  for (const auto& it : stats.trimming) {
×
558
    totals += *it;
×
559
  }
560

561
  uint64_t adapter_reads = 0;
×
562
  uint64_t adapter_bases = 0;
×
563

564
  for (size_t i = 0; i < config.samples.adapters().size(); ++i) {
×
565
    adapter_reads += totals.adapter_trimmed_reads.get(i);
×
566
    adapter_bases += totals.adapter_trimmed_bases.get(i);
×
567
  }
568

569
  const auto total_input =
×
570
    summarize_input(stats.input_1) + summarize_input(stats.input_2);
×
571

572
  reads_and_bases total_output;
×
573
  for (const auto& it : stats.trimming) {
×
574
    total_output += summarize_input(it->read_1);
×
575
    total_output += summarize_input(it->read_2);
×
576
    total_output += summarize_input(it->singleton);
×
577
    total_output += summarize_input(it->merged);
×
578
  }
579

580
  // Trimming steps prior to adapter trimming
581
  size_t step_id = 0;
×
582
  std::vector<trimming_stats> trimming = {
×
583
    { step_id++, "Input", "Raw reads", "-", true, total_input },
×
584
    { step_id++,
×
585
      "Processing",
586
      "Terminal bases",
587
      "-",
588
      config.is_terminal_base_pre_trimming_enabled(),
×
589
      totals.terminal_pre_trimmed },
590
  };
591

592
  build_polyx_trimming_rows(trimming,
×
593
                            config.pre_trim_poly_x,
×
594
                            totals.poly_x_pre_trimmed_reads,
595
                            totals.poly_x_pre_trimmed_bases,
596
                            step_id++);
597

598
  trimming.push_back({ step_id++,
×
599
                       "Processing",
600
                       "Adapters",
601
                       "-",
602
                       config.is_adapter_trimming_enabled(),
×
603
                       reads_and_bases(adapter_reads, adapter_bases) });
604

605
  trimming.push_back({ step_id++,
×
606
                       "Processing",
607
                       "Merging",
608
                       "-",
609
                       config.is_read_merging_enabled(),
×
610
                       totals.reads_merged });
611

612
  trimming.push_back({ step_id++,
×
613
                       "Processing",
614
                       "Terminal bases",
615
                       "-",
616
                       config.is_terminal_base_post_trimming_enabled(),
×
617
                       totals.terminal_post_trimmed });
618

619
  build_polyx_trimming_rows(trimming,
×
620
                            config.post_trim_poly_x,
×
621
                            totals.poly_x_post_trimmed_reads,
622
                            totals.poly_x_post_trimmed_bases,
623
                            step_id++);
624

625
  trimming.push_back({ step_id++,
×
626
                       "Processing",
627
                       "Low quality bases",
628
                       "-",
629
                       config.is_low_quality_trimming_enabled(),
×
630
                       totals.low_quality_trimmed });
631

632
  trimming.push_back({ step_id++,
×
633
                       "Filtering",
634
                       "Short reads",
635
                       "-",
636
                       config.is_short_read_filtering_enabled(),
×
637
                       totals.filtered_min_length });
638

639
  trimming.push_back({ step_id++,
×
640
                       "Filtering",
641
                       "Long reads",
642
                       "-",
643
                       config.is_long_read_filtering_enabled(),
×
644
                       totals.filtered_max_length });
645
  trimming.push_back({ step_id++,
×
646
                       "Filtering",
647
                       "Ambiguous bases",
648
                       "-",
649
                       config.is_ambiguous_base_filtering_enabled(),
×
650
                       totals.filtered_ambiguous });
651
  trimming.push_back({ step_id++,
×
652
                       "Filtering",
653
                       "Mean quality",
654
                       "-",
655
                       config.is_mean_quality_filtering_enabled(),
×
656
                       totals.filtered_mean_quality });
657
  trimming.push_back({ step_id++,
×
658
                       "Filtering",
659
                       "Low complexity reads",
660
                       "-",
661
                       config.is_low_complexity_filtering_enabled(),
×
662
                       totals.filtered_low_complexity });
663

664
  trimming.push_back(
×
665
    { step_id++, "Output", "Filtered reads", "-", true, total_output });
×
666

667
  write_html_trimming_stats(output, trimming, total_input);
×
668
}
669

670
void
671
write_html_section_title(const std::string& title, std::ostream& output)
×
672
{
673
  html_h2_tag().set_title(title).set_href(to_lower(title)).write(output);
×
674
}
675

676
void
677
write_html_io_section(const userconfig& config,
×
678
                      std::ostream& output,
679
                      const std::string& title,
680
                      fastq_stats_vec statistics,
681
                      string_vec names,
682
                      const fastq_stats_ptr& merged = fastq_stats_ptr())
683
{
684
  AR_REQUIRE(statistics.size() == names.size());
×
685

686
  write_html_section_title(title, output);
×
687

688
  const char* dynamic_width =
×
689
    config.paired_ended_mode || merged ? FACET_WIDTH_2 : FACET_WIDTH_1;
×
690

691
  html_plot_title()
×
692
    .set_href(to_lower(title) + "-position-qualities")
×
693
    .set_title("Position quality distribution")
×
694
    .write(output);
×
695
  html_facet_line_plot()
×
696
    .set_x_axis(config.is_read_merging_enabled() && merged ? "null"
×
697
                                                           : "Position"_json)
698
    .set_y_axis("Phred score"_json)
×
699
    .set_width(dynamic_width)
×
700
    .set_values(build_base_qualities(statistics, names))
×
701
    .write(output);
×
702

703
  if (config.is_read_merging_enabled() && merged) {
×
704
    html_facet_line_plot()
×
705
      .set_x_axis("Position"_json)
×
706
      .set_y_axis("Phred score"_json)
×
707
      .set_width(FIGURE_WIDTH)
×
708
      .set_values(build_base_qualities({ merged }, { "Merged" }))
×
709
      .write(output);
×
710
  }
711

712
  html_plot_title()
×
713
    .set_href(to_lower(title) + "-nucleotide-content")
×
714
    .set_title("Nucleotide content")
×
715
    .write(output);
×
716
  html_facet_line_plot()
×
717
    .set_x_axis(config.is_read_merging_enabled() && merged ? "null"
×
718
                                                           : "Position"_json)
719
    .set_y_axis("Frequency"_json)
×
720
    .set_width(dynamic_width)
×
721
    .set_values(build_base_content(statistics, names))
×
722
    .write(output);
×
723

724
  if (config.is_read_merging_enabled() && merged) {
×
725
    html_facet_line_plot()
×
726
      .set_x_axis("Position"_json)
×
727
      .set_y_axis("Frequency"_json)
×
728
      .set_width(FIGURE_WIDTH)
×
729
      .set_values(build_base_content({ merged }, { "Merged" }))
×
730
      .write(output);
×
731

732
    // Subsequent plots should include merged reads
733
    names.push_back("Merged");
×
734
    statistics.push_back(merged);
×
735
  }
736

737
  html_plot_title()
×
738
    .set_href(to_lower(title) + "-quality-scores")
×
739
    .set_title("Quality score distribution")
×
740
    .write(output);
×
741
  html_frequency_plot()
×
742
    .set_x_axis("Phred score"_json)
×
743
    .set_y_axis("Frequency"_json)
×
744
    .set_width(FIGURE_WIDTH)
×
745
    .set_values(build_quality_distribution(statistics, names))
×
746
    .write(output);
×
747

748
  {
×
749
    json_list data;
×
750

751
    for (size_t i = 0; i < statistics.size(); ++i) {
×
752
      const auto m = data.dict();
×
753
      m->str("group", names.at(i));
×
754
      m->i64("offset", 0);
×
755
      m->f64_vec("y", statistics.at(i)->gc_content());
×
756
    }
757

758
    html_plot_title()
×
759
      .set_href(to_lower(title) + "-gc-content")
×
760
      .set_title("GC Content")
×
761
      .write(output);
×
762
    html_frequency_plot()
×
763
      .set_x_axis("%GC"_json)
×
764
      .set_y_axis("Frequency"_json)
×
765
      .set_width(FIGURE_WIDTH)
×
766
      .set_values(data.to_string())
×
767
      .write(output);
×
768
  }
769
}
770

771
void
772
write_html_input_section(const userconfig& config,
×
773
                         const statistics& stats,
774
                         std::ostream& output)
775
{
776
  fastq_stats_vec stats_vec = { stats.input_1 };
×
777
  string_vec names = { "File 1" };
×
778

779
  if (config.paired_ended_mode) {
×
780
    stats_vec.push_back(stats.input_2);
×
781
    names.emplace_back("File 2");
×
782
  }
783

784
  write_html_io_section(config,
×
785
                        output,
786
                        "Input",
787
                        std::move(stats_vec),
788
                        std::move(names));
789
}
790

791
void
792
write_html_analyses_section(const userconfig& config,
×
793
                            const statistics& stats,
794
                            std::ostream& output)
795

796
{
797
  write_html_section_title("Analyses", output);
×
798

799
  // Insert size distribution
800
  if (config.paired_ended_mode) {
×
801
    counts insert_sizes;
×
802
    for (const auto& it : stats.trimming) {
×
803
      insert_sizes += it->insert_sizes;
×
804
    }
805

806
    json_list samples;
×
807
    const auto sample = samples.dict();
×
808
    sample->str("group", "insert_sizes");
×
809
    sample->i64("offset", 0);
×
810
    sample->i64_vec("y", insert_sizes);
×
811

812
    // FIXME: Specify "identified reads" when in demultiplexing mode and
813
    // correct format_percentage to merged / n_identified.
814
    std::ostringstream ss;
×
815
    ss << "Insert sizes inferred for "
×
816
       << format_percentage(insert_sizes.sum(),
×
817
                            stats.input_1->number_of_input_reads())
×
818
       << " of reads";
×
819

820
    html_plot_title()
×
821
      .set_href("analyses-insert-sizes")
×
822
      .set_title("Insert-size distribution")
×
823
      .write(output);
×
824
    html_plot_sub_title().set_sub_title(ss.str()).write(output);
×
825
    html_frequency_plot()
×
826
      .set_x_axis("Insert size"_json)
×
827
      .set_y_axis("Frequency"_json)
×
828
      .set_legend("null")
×
829
      .set_width(FIGURE_WIDTH)
×
830
      .set_values(samples.to_string())
×
831
      .write(output);
×
832

833
    if (config.run_type == ar_command::report_only) {
×
834
      html_output_note()
×
835
        .set_text(
×
836
          "Insert size distribution inferred using adapter-free alignments.")
837
        .write(output);
×
838
    }
839
  }
840

841
  if (config.report_duplication) {
×
842
    AR_REQUIRE(stats.duplication_1 && stats.duplication_2);
×
843
    const auto dupes_1 = stats.duplication_1->summarize();
×
844
    const auto dupes_2 = stats.duplication_2->summarize();
×
845
    const auto mean_uniq_frac = (dupes_1.unique_frac + dupes_2.unique_frac) / 2;
×
846

847
    const auto to_percent = [](double value) {
×
848
      std::ostringstream os;
×
849
      os << std::fixed << std::setprecision(1) << (value * 100.0) << " %";
×
850
      return os.str();
×
851
    };
852

853
    html_duplication_head().write(output);
×
854
    if (config.paired_ended_mode) {
×
855
      html_duplication_body_pe()
×
856
        .set_pct_unique(to_percent(mean_uniq_frac))
×
857
        .set_pct_unique_1(to_percent(dupes_1.unique_frac))
×
858
        .set_pct_unique_2(to_percent(dupes_2.unique_frac))
×
859
        .write(output);
×
860
    } else {
861
      html_duplication_body_se()
×
862
        .set_pct_unique(to_percent(dupes_1.unique_frac))
×
863
        .write(output);
×
864
    }
865

866
    const auto add_line = [](json_list& list,
×
867
                             std::string_view read,
868
                             std::string_view group,
869
                             const std::vector<std::string>& labels,
870
                             const rates& values) {
871
      AR_REQUIRE(labels.size() == values.size());
×
872
      for (size_t i = 0; i < labels.size(); ++i) {
×
873
        auto dict = list.dict();
×
874
        dict->str("read", read);
×
875
        dict->str("group", group);
×
876
        dict->str("x", labels.at(i));
×
877
        dict->f64("y", values.get(i));
×
878
      }
879
    };
880

881
    json_list data;
×
882
    const auto add_lines = [add_line, &data](const decltype(dupes_1)& s,
×
883
                                             std::string_view label) {
884
      add_line(data, label, "All", s.labels, s.total_sequences);
×
885
      add_line(data, label, "Unique", s.labels, s.unique_sequences);
×
886
    };
887

888
    add_lines(dupes_1, "File 1");
×
889
    if (config.paired_ended_mode) {
×
890
      add_lines(dupes_2, "File 2");
×
891
    }
892

893
    html_duplication_plot()
×
894
      .set_width(config.paired_ended_mode ? FACET_WIDTH_2 : FACET_WIDTH_1)
×
895
      .set_values(data.to_string())
×
896
      .write(output);
×
897
  }
898

899
  // Consensus adapter sequence inference
900
  if (config.paired_ended_mode && config.run_type == ar_command::report_only) {
×
901
    AR_REQUIRE(stats.adapter_id);
×
902

903
    const auto adapter_1 = stats.adapter_id->adapter1.summarize();
×
904
    const auto adapter_2 = stats.adapter_id->adapter2.summarize();
×
905

906
    // Consensus adapter sequences
907
    {
×
908
      const auto reference_adapters =
×
909
        config.samples.adapters().to_read_orientation().front();
×
910
      std::string reference_adapter_1{ reference_adapters.first };
×
911
      std::string reference_adapter_2{ reference_adapters.second };
×
912

913
      html_consensus_adapter_head()
×
914
        .set_overlapping_pairs(
×
915
          format_rough_number(stats.adapter_id->aligned_pairs))
×
916
        .set_pairs_with_adapters(
×
917
          format_rough_number(stats.adapter_id->pairs_with_adapters))
×
918
        .write(output);
×
919

920
      html_consensus_adapter_table()
×
921
        .set_name_1("--adapter1")
×
922
        .set_reference_1(reference_adapter_1)
×
923
        .set_alignment_1(adapter_1.compare_with(reference_adapter_1))
×
924
        .set_consensus_1(adapter_1.adapter().sequence())
×
925
        .set_qualities_1(adapter_1.adapter().qualities())
×
926
        .set_name_2("--adapter2")
×
927
        .set_reference_2(reference_adapter_2)
×
928
        .set_alignment_2(adapter_2.compare_with(reference_adapter_2))
×
929
        .set_consensus_2(adapter_2.adapter().sequence())
×
930
        .set_qualities_2(adapter_2.adapter().qualities())
×
931
        .write(output);
×
932
    }
933

934
    // Top N most common 5' kmers in adapter fragments
935
    {
×
936
      const auto& top_kmers_1 = adapter_1.top_kmers();
×
937
      const auto& top_kmers_2 = adapter_2.top_kmers();
×
938

939
      html_consensus_adapter_kmer_head()
×
940
        .set_n_kmers(std::to_string(consensus_adapter_stats::top_n_kmers))
×
941
        .set_kmer_length(std::to_string(consensus_adapter_stats::kmer_length))
×
942
        .write(output);
×
943

944
      const auto kmers = std::max(top_kmers_1.size(), top_kmers_2.size());
×
945
      for (size_t i = 0; i < kmers; ++i) {
×
946
        html_consensus_adapter_kmer_row row;
×
947
        row.set_index(std::to_string(i + 1));
×
948

949
        if (top_kmers_1.size() > i) {
×
950
          const auto& kmer = top_kmers_1.at(i);
×
951

952
          row.set_kmer_1(kmer.first)
×
953
            .set_count_1(format_rough_number(kmer.second))
×
954
            .set_pct_1(format_percentage(kmer.second, adapter_1.total_kmers()));
×
955
        }
956

957
        if (top_kmers_2.size() > i) {
×
958
          const auto& kmer = top_kmers_2.at(i);
×
959

960
          row.set_kmer_2(kmer.first)
×
961
            .set_count_2(format_rough_number(kmer.second))
×
962
            .set_pct_2(format_percentage(kmer.second, adapter_2.total_kmers()));
×
963
        }
964

965
        row.write(output);
×
966
      }
967

968
      html_consensus_adapter_kmer_tail().write(output);
×
969
    }
970
  }
971
}
972

973
void
974
write_html_demultiplexing_barplot(const userconfig& config,
×
975
                                  const statistics& stats,
976
                                  std::ostream& output)
977
{
978
  json_list data;
×
979

980
  const size_t input_reads = stats.input_1->number_of_input_reads() +
×
981
                             stats.input_2->number_of_input_reads();
×
982

983
  for (size_t i = 0; i < config.samples.size(); ++i) {
×
984
    const auto& sample = config.samples.at(i);
×
985

986
    for (size_t j = 0; j < sample.size(); ++j) {
×
987
      auto count = stats.demultiplexing->samples.at(i).get(j);
×
988

989
      const auto& sequences = sample.at(j);
×
990
      std::string key{ sequences.barcode_1 };
×
991
      if (!sequences.barcode_2.empty()) {
×
992
        key.push_back('-');
×
993
        key.append(sequences.barcode_2);
×
994
      }
995

996
      auto m = data.dict();
×
997
      m->i64("n", j + 1);
×
998
      m->str("barcodes", key);
×
999
      m->str("sample", sample.name());
×
1000

1001
      if (input_reads) {
×
1002
        m->f64("pct", (100.0 * count) / input_reads);
×
1003
      } else {
1004
        m->null("pct");
×
1005
      }
1006
    }
1007
  }
1008

1009
  html_plot_title()
×
1010
    .set_href("demux-samples")
×
1011
    .set_title("Samples identified")
×
1012
    .write(output);
×
1013
  html_bar_plot()
×
1014
    .set_x_axis("Samples"_json)
×
1015
    .set_y_axis("Percent"_json)
×
1016
    .set_width(FIGURE_WIDTH)
×
1017
    .set_values(data.to_string())
×
1018
    .write(output);
×
1019
}
1020

1021
void
1022
write_html_demultiplexing_table(const userconfig& config,
×
1023
                                const statistics& stats,
1024
                                std::ostream& output,
1025
                                const bool multiple_barcodes)
1026
{
1027
  const size_t input_reads = stats.input_1->number_of_input_reads() +
×
1028
                             stats.input_2->number_of_input_reads();
×
1029

1030
  html_demultiplexing_table_head().write(output);
×
1031

1032
  {
×
1033
    const size_t unidentified = stats.demultiplexing->unidentified;
×
1034

1035
    fastq_statistics total;
×
1036
    total += *stats.demultiplexing->unidentified_stats_1;
×
1037
    total += *stats.demultiplexing->unidentified_stats_2;
×
1038

1039
    const auto output_reads = total.length_dist().sum();
×
1040
    const auto output_bp = total.nucleotides_pos().sum();
×
1041

1042
    html_demultiplexing_row()
×
1043
      .set_name("<b>Unidentified</b>")
×
1044
      .set_sample_pct(format_percentage(unidentified, input_reads, 2))
×
1045
      .set_reads(format_rough_number(output_reads))
×
1046
      .set_bp(format_rough_number(output_bp))
×
1047
      .set_length(mean_of_bp_counts(total.length_dist()))
×
1048
      .set_gc(format_percentage(total.nucleotides_gc_pos().sum(), output_bp))
×
1049
      .write(output);
×
1050
  }
1051

1052
  size_t sample_idx = 0;
×
1053
  for (const auto& sample : config.samples) {
×
1054
    const auto& output_stats = *stats.trimming.at(sample_idx);
×
1055
    const auto& barcode_counts = stats.demultiplexing->samples.at(sample_idx);
×
1056
    const auto sample_reads = barcode_counts.sum();
×
1057

1058
    fastq_statistics total;
×
1059

1060
    total += *output_stats.read_1;
×
1061
    total += *output_stats.read_2;
×
1062
    total += *output_stats.merged;
×
1063
    total += *output_stats.singleton;
×
1064
    // Not included in overview:
1065
    // total += *sample.discarded;
1066

1067
    const auto output_reads = total.length_dist().sum();
×
1068
    const auto output_bp = total.nucleotides_pos().sum();
×
1069

1070
    html_demultiplexing_row row;
×
1071
    if (sample.size() < 2) {
×
1072
      row.set_barcode_1(std::string{ sample.at(0).barcode_1 })
×
1073
        .set_barcode_2(std::string{ sample.at(0).barcode_2 });
×
1074
    } else {
1075
      const auto cell =
×
1076
        "<i>(" + std::to_string(sample.size()) + " barcodes)</i>";
×
1077
      row.set_barcode_1(cell).set_barcode_2(cell);
×
1078
    }
1079

1080
    row.set_n(std::to_string(sample_idx + 1))
×
1081
      .set_name(sample.name())
×
1082
      .set_sample_pct(format_percentage(sample_reads, input_reads, 2))
×
1083
      .set_reads(format_rough_number(output_reads))
×
1084
      .set_bp(format_rough_number(output_bp))
×
1085
      .set_length(mean_of_bp_counts(total.length_dist()))
×
1086
      .set_gc(format_percentage(total.nucleotides_gc_pos().sum(), output_bp))
×
1087
      .write(output);
×
1088

1089
    if (sample.size() > 1) {
×
1090
      const auto total = barcode_counts.sum();
×
1091

1092
      for (size_t j = 0; j < sample.size(); j++) {
×
1093
        const auto& it = sample.at(j);
×
1094
        const auto count = barcode_counts.get(j);
×
1095

1096
        html_demultiplexing_barcode_row()
×
1097
          .set_barcode_1(std::string{ it.barcode_1 })
×
NEW
1098
          .set_barcode_2(std::string{ it.barcode_2 })
×
1099
          .set_barcode_pct_row(format_percentage(count, total, 2))
×
1100
          .write(output);
×
1101
      }
1102
    }
1103

1104
    ++sample_idx;
×
1105
  }
1106

1107
  html_demultiplexing_table_tail().write(output);
×
1108

1109
  if (multiple_barcodes) {
×
1110
    html_demultiplexing_toggle().write(output);
×
1111
  }
1112
}
1113

1114
void
1115
write_html_demultiplexing_section(const userconfig& config,
×
1116
                                  const statistics& stats,
1117
                                  std::ostream& output)
1118

1119
{
1120
  bool multiple_barcodes = false;
×
1121
  for (const auto& sample : config.samples) {
×
1122
    if (sample.size() > 1) {
×
1123
      multiple_barcodes = true;
1124
      break;
1125
    }
1126
  }
1127

1128
  write_html_section_title("Demultiplexing", output);
×
1129
  html_demultiplexing_head().write(output);
×
1130
  write_html_demultiplexing_barplot(config, stats, output);
×
1131
  write_html_demultiplexing_table(config, stats, output, multiple_barcodes);
×
1132
}
1133

1134
void
1135
write_html_output_section(const userconfig& config,
×
1136
                          const statistics& stats,
1137
                          std::ostream& output)
1138

1139
{
1140
  fastq_stats_vec stats_vec;
×
1141
  string_vec names;
×
1142

1143
  auto merged = std::make_shared<fastq_statistics>();
×
1144

1145
  {
×
1146
    auto output_1 = std::make_shared<fastq_statistics>();
×
1147
    auto output_2 = std::make_shared<fastq_statistics>();
×
1148
    auto singleton = std::make_shared<fastq_statistics>();
×
1149
    auto discarded = std::make_shared<fastq_statistics>();
×
1150

1151
    for (const auto& it : stats.trimming) {
×
1152
      *output_1 += *it->read_1;
×
1153
      *output_2 += *it->read_2;
×
1154
      *merged += *it->merged;
×
1155
      *singleton += *it->singleton;
×
1156
      *discarded += *it->discarded;
×
1157
    }
1158

1159
    stats_vec.push_back(output_1);
×
1160
    names.emplace_back("Output 1");
×
1161

1162
    if (config.paired_ended_mode) {
×
1163
      stats_vec.push_back(output_2);
×
1164
      names.emplace_back("Output 2");
×
1165

1166
      if (config.is_any_filtering_enabled()) {
×
1167
        stats_vec.push_back(singleton);
×
1168
        names.emplace_back("Singleton");
×
1169
      }
1170
    }
1171

1172
    if (config.is_any_filtering_enabled()) {
×
1173
      stats_vec.push_back(discarded);
×
1174
      names.emplace_back("Discarded");
×
1175
    }
1176
  }
1177

1178
  write_html_io_section(config,
×
1179
                        output,
1180
                        "Output",
1181
                        std::move(stats_vec),
1182
                        std::move(names),
1183
                        merged);
1184
}
1185

1186
} // namespace
1187

1188
////////////////////////////////////////////////////////////////////////////////
1189

1190
bool
1191
write_html_report(const userconfig& config,
×
1192
                  const statistics& stats,
1193
                  const std::string& filename)
1194
{
1195
  if (filename == DEV_NULL) {
×
1196
    // User disabled the report
1197
    return true;
1198
  }
1199

1200
  std::ostringstream output;
×
1201

1202
  write_html_summary_section(config, stats, output);
×
1203

1204
  if (config.run_type != ar_command::demultiplex_only &&
×
1205
      config.run_type != ar_command::report_only) {
1206
    write_html_processing_section(config, stats, output);
×
1207
  }
1208

1209
  write_html_input_section(config, stats, output);
×
1210

1211
  if (config.paired_ended_mode || config.report_duplication ||
×
1212
      config.run_type == ar_command::report_only) {
×
1213
    write_html_analyses_section(config, stats, output);
×
1214
  }
1215

1216
  if (config.is_demultiplexing_enabled()) {
×
1217
    write_html_demultiplexing_section(config, stats, output);
×
1218
  }
1219

1220
  if (config.run_type != ar_command::report_only) {
×
1221
    write_html_output_section(config, stats, output);
×
1222
  }
1223

1224
  html_body_end().write(output);
×
1225

1226
  try {
×
1227
    managed_writer writer{ filename };
×
1228
    writer.write(output.str());
×
1229
    writer.close();
×
1230
  } catch (const std::ios_base::failure& error) {
×
1231
    log::error() << "Error writing JSON report to '" << filename << "':\n"
×
1232
                 << indent_lines(error.what());
×
1233
    return false;
×
1234
  }
×
1235

1236
  return true;
×
1237
}
1238

1239
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc