• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #103

18 Apr 2025 01:56PM UTC coverage: 66.959% (-0.2%) from 67.126%
#103

push

travis-ci

web-flow
include barcode orientation in HTML/JSON reports (#129)

This implements part of #68

0 of 48 new or added lines in 2 files covered. (0.0%)

3 existing lines in 2 files now uncovered.

9697 of 14482 relevant lines covered (66.96%)

3053.41 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/reports_html.cpp
1
// SPDX-License-Identifier: GPL-3.0-or-later
2
// SPDX-FileCopyrightText: 2022 Mikkel Schubert <mikkelsch@gmail.com>
3
#include "adapter_id.hpp"            // for adapter_id_statistics
4
#include "counts.hpp"                // for counts, indexed_count, counts_tmpl
5
#include "debug.hpp"                 // for AR_REQUIRE
6
#include "fastq.hpp"                 // for ACGT, ACGT::values, fastq, ACGTN
7
#include "json.hpp"                  // for json_dict, json_list, json_ptr
8
#include "logging.hpp"               // for log_stream, error
9
#include "main.hpp"                  // for VERSION, NAME
10
#include "managed_io.hpp"            // for managed_io
11
#include "output.hpp"                // for DEV_NULL, output_files
12
#include "reports.hpp"               // for write_html_report
13
#include "reports_template_html.hpp" // for html_frequency_plot, html_demultiple...
14
#include "sequence_sets.hpp"         // for adapter_set
15
#include "simd.hpp"                  // for size_t
16
#include "statistics.hpp"            // for fastq_stats_ptr, fastq_statistics
17
#include "strutils.hpp"              // for format_percentage, format_rough...
18
#include "userconfig.hpp"            // for userconfig, ar_command, DEV_NULL
19
#include <algorithm>                 // for max
20
#include <cctype>                    // for toupper
21
#include <cerrno>                    // for errno
22
#include <cmath>                     // for fmod
23
#include <cstdint>                   // for uint64_t
24
#include <cstring>                   // for size_t, strerror
25
#include <iomanip>                   // for operator<<, setprecision, setw
26
#include <memory>                    // for __shared_ptr_access, shared_ptr
27
#include <sstream>                   // for ostringstream
28
#include <string>                    // for string, operator==, to_string
29
#include <string_view>               // for string_view
30
#include <utility>                   // for pair
31
#include <vector>                    // for vector
32

33
namespace adapterremoval {
34

35
namespace {
36

37
using fastq_stats_vec = std::vector<fastq_stats_ptr>;
38
using template_ptr = std::unique_ptr<html_template>;
39

40
//! Size chosen to allow fitting two pages side-by-side on a 1920 width display
41
const char* const FIGURE_WIDTH = "736";
42
//! Per figure width for two-column facet figures; approximate
43
const char* const FACET_WIDTH_2 = "351";
44
//! Per figure width for one-column facet figures; approximate
45
const char* const FACET_WIDTH_1 = FIGURE_WIDTH;
46

47
////////////////////////////////////////////////////////////////////////////////
48

49
/** Escapes a string that needs to be embedded in a JS */
50
std::string
51
json_encode(const std::string& s)
×
52
{
53
  return json_token::from_str(s)->to_string();
×
54
}
55

56
/** JSON escaped string */
57
std::string
58
operator""_json(const char* s, size_t length)
×
59
{
60
  return json_encode(std::string(s, length));
×
61
}
62

63
std::string
64
runtime_to_str(double seconds)
×
65
{
66
  std::ostringstream ss;
×
67

68
  if (seconds >= 3600.0) {
×
69
    ss << static_cast<size_t>(seconds / 3600.0) << " "
×
70
       << (seconds >= 7200.0 ? "hours, " : "hour, ") << std::setw(2);
×
71
  }
72

73
  if (seconds >= 60.0) {
×
74
    auto minutes = static_cast<size_t>(std::fmod(seconds, 3600.0) / 60.0);
×
75
    ss << minutes << " "
×
76
       << ((!minutes || minutes >= 120) ? "minutes" : "minute") << ", and "
×
77
       << std::setw(4);
×
78
  }
79

80
  ss << std::fixed << std::setprecision(1) << std::fmod(seconds, 60.0)
×
81
     << " seconds";
×
82

83
  return ss.str();
×
84
}
85

86
std::string
87
mean_of_bp_counts(const counts& count)
×
88
{
89
  auto reads = count.sum();
×
90
  auto bases = count.product();
×
91

92
  if (!reads) {
×
93
    return "NA";
×
94
  }
95

96
  if (bases % reads == 0) {
×
97
    return std::to_string(bases / reads) + " bp";
×
98
  }
99

100
  std::ostringstream ss;
×
101
  ss << std::fixed << std::setprecision(1)
×
102
     << (bases / static_cast<double>(reads)) << " bp";
×
103

104
  return ss.str();
×
105
}
106

107
/**
108
 * VEGA-lite will omit plots if there are no values; this function therefore
109
 * ensures that at least one value is written for a given measurement.
110
 */
111
template<typename T>
112
counts_tmpl<T>
113
require_values(counts_tmpl<T> r, T fallback = T())
×
114
{
115
  if (r.size()) {
×
116
    return r;
×
117
  }
118

119
  return counts_tmpl<T>({ fallback });
×
120
}
121

122
std::string
123
format_average_bases(const reads_and_bases& counts)
×
124
{
125
  const auto reads = counts.reads();
×
126

127
  if (reads) {
×
128
    return format_fraction(counts.bases(), reads, 1) + " bp";
×
129
  } else {
130
    return "NA";
×
131
  }
132
}
133

134
std::string
NEW
135
orientation_to_label(const sample_sequences& it)
×
136
{
NEW
137
  switch (it.orientation) {
×
NEW
138
    case barcode_orientation::unspecified:
×
NEW
139
      return {};
×
NEW
140
    case barcode_orientation::forward:
×
NEW
141
      return "+";
×
NEW
142
    case barcode_orientation::reverse:
×
NEW
143
      return "-";
×
NEW
144
    default:
×
NEW
145
      AR_FAIL("invalid barcode orientation");
×
146
  }
147
}
148

149
////////////////////////////////////////////////////////////////////////////////
150

151
class io_summary_writer
152
{
153
public:
154
  enum class io
155
  {
156
    input,
157
    output
158
  };
159

160
  io_summary_writer(std::ostream& output, const io type)
×
161
    : m_output(output)
×
162
    , m_type(type)
×
163

164
  {
165
  }
166

167
  void write_head(const std::string& title, const std::string& href)
×
168
  {
169
    html_summary_io_head().set_title(title).set_href(href).write(m_output);
×
170
  }
171

172
  void write_row(const std::string& title, const fastq_statistics& stats)
×
173
  {
174
    const auto n_reads = (m_type == io::input) ? stats.number_of_input_reads()
×
175
                                               : stats.number_of_output_reads();
×
176
    const auto total = stats.quality_dist().sum();
×
177

178
    html_summary_io_row()
×
179
      .set_name(title)
×
180
      .set_n_reads(format_rough_number(n_reads))
×
181
      .set_n_bases(format_rough_number(stats.length_dist().product()))
×
182
      .set_lengths(mean_of_bp_counts(stats.length_dist()))
×
183
      .set_q30(format_percentage(stats.quality_dist().sum(30), total))
×
184
      .set_q20(format_percentage(stats.quality_dist().sum(20), total))
×
185
      .set_ns(format_percentage(stats.nucleotides_pos('N').sum(), total))
×
186
      .set_gc(format_percentage(stats.nucleotides_gc_pos().sum(), total))
×
187
      .write(m_output);
×
188
  }
189

190
  void write_tail() { html_summary_io_tail().write(m_output); }
×
191

192
private:
193
  std::ostream& m_output;
194
  io m_type;
195
};
196

197
std::string
198
build_base_qualities(const fastq_stats_vec& reads, const string_vec& names)
×
199
{
200
  json_list qualities;
×
201

202
  for (size_t i = 0; i < reads.size(); ++i) {
×
203
    const auto& stats = *reads.at(i);
×
204

205
    auto total_quality = stats.qualities_pos();
×
206
    auto total_bases = stats.nucleotides_pos();
×
207

208
    for (const auto nucleotide : ACGT::values) {
×
209
      const auto nucleotides = stats.nucleotides_pos(nucleotide);
×
210
      const auto quality = stats.qualities_pos(nucleotide);
×
211

212
      auto dict = qualities.dict();
×
213
      dict->str("read", names.at(i));
×
214
      dict->i64("offset", 1);
×
215
      dict->str("group", std::string(1, ::toupper(nucleotide)));
×
216
      dict->f64_vec("y", quality / nucleotides);
×
217
    }
218

219
    auto dict = qualities.dict();
×
220
    dict->str("read", names.at(i));
×
221
    dict->i64("offset", 1);
×
222
    dict->str("group", "Mean");
×
223

224
    // Ensure that values get written, to prevent the plot being omitted
225
    dict->f64_vec("y", require_values(total_quality / total_bases));
×
226
  }
227

228
  return qualities.to_string();
×
229
}
230

231
std::string
232
build_quality_distribution(const fastq_stats_vec& reads,
×
233
                           const string_vec& names)
234
{
235
  json_list data;
×
236

237
  for (size_t i = 0; i < reads.size(); ++i) {
×
238
    const auto& stats = reads.at(i);
×
239
    auto count = stats->quality_dist().trim();
×
240
    // A max that should give a uniform look to most data
241
    count.resize_up_to(44);
×
242

243
    const auto m = data.dict();
×
244
    m->str("group", names.at(i));
×
245
    m->i64("offset", 0);
×
246
    m->i64_vec("y", count);
×
247
  }
248

249
  return data.to_string();
×
250
}
251

252
std::string
253
build_base_content(const fastq_stats_vec& reads, const string_vec& names)
×
254
{
255
  json_list content;
×
256

257
  for (size_t i = 0; i < reads.size(); ++i) {
×
258
    const auto& stats = *reads.at(i);
×
259

260
    auto total_bases = stats.nucleotides_pos();
×
261

262
    for (const auto nucleotide : ACGTN::values) {
×
263
      const auto bases = stats.nucleotides_pos(nucleotide);
×
264

265
      const auto dict = content.dict();
×
266
      dict->str("read", names.at(i));
×
267
      dict->i64("offset", 1);
×
268
      dict->str("group", std::string(1, nucleotide));
×
269

270
      // Ensure that values get written, to prevent the plot being omitted
271
      dict->f64_vec("y", require_values(bases / total_bases));
×
272
    }
273

274
    {
×
275
      const auto gc_content = stats.nucleotides_gc_pos();
×
276
      auto dict = content.dict();
×
277
      dict->str("read", names.at(i));
×
278
      dict->i64("offset", 1);
×
279
      dict->str("group", "GC");
×
280

281
      // Ensure that values get written, to prevent the plot being omitted
282
      dict->f64_vec("y", require_values(gc_content / total_bases));
×
283
    }
284
  }
285

286
  return content.to_string();
×
287
}
288

289
////////////////////////////////////////////////////////////////////////////////
290
// Main sections
291

292
void
293
write_html_sampling_note(const userconfig& config,
×
294
                         const std::string& label,
295
                         const fastq_statistics& stats,
296
                         std::ostream& output)
297
{
298
  if (config.report_sample_rate < 1.0) {
×
299
    html_sampling_note()
×
300
      .set_label(label)
×
301
      .set_reads(format_rough_number((stats.number_of_sampled_reads())))
×
302
      .set_pct(format_percentage(stats.number_of_sampled_reads(),
×
303
                                 stats.number_of_input_reads()))
×
304
      .write(output);
×
305
  }
306
}
307

308
void
309
write_html_summary_section(const userconfig& config,
×
310
                           const statistics& stats,
311
                           std::ostream& output)
312
{
313
  html_head().set_title(config.report_title).write(output);
×
314

315
  html_body_start().set_title(config.report_title).write(output);
×
316

317
  // Basic information about the executable / call
318
  {
×
319
    html_summary()
×
320
      .set_date_and_time(userconfig::start_time)
×
321
      .set_version(VERSION)
×
322
      .set_command(shell_escape_command(config.args))
×
323
      .set_runtime(runtime_to_str(config.runtime()))
×
324
      .write(output);
×
325
  }
326

327
  fastq_statistics output_1;
×
328
  fastq_statistics output_2;
×
329
  fastq_statistics merged;
×
330
  fastq_statistics singleton;
×
331
  fastq_statistics discarded;
×
332

333
  for (const auto& it : stats.trimming) {
×
334
    output_1 += *it->read_1;
×
335
    output_2 += *it->read_2;
×
336
    merged += *it->merged;
×
337
    singleton += *it->singleton;
×
338
    discarded += *it->discarded;
×
339
  }
340

341
  if (config.paired_ended_mode) {
×
342
    // Summary statistics for input files
343
    {
×
344
      fastq_statistics totals;
×
345
      totals += *stats.input_1;
×
346
      totals += *stats.input_2;
×
347

348
      io_summary_writer summary(output, io_summary_writer::io::input);
×
349
      summary.write_head("Input", "summary-input");
×
350
      if (config.paired_ended_mode) {
×
351
        summary.write_row("Summary", totals);
×
352
        summary.write_row("File 1", *stats.input_1);
×
353
        summary.write_row("File 2", *stats.input_2);
×
354
      }
355
      summary.write_tail();
×
356

357
      write_html_sampling_note(config, "input", totals, output);
×
358
    }
359

360
    // Summary statistics for output files
361
    if (config.run_type != ar_command::report_only) {
×
362
      fastq_statistics totals;
×
363
      totals += output_1;
×
364
      totals += output_2;
×
365
      totals += merged;
×
366
      totals += singleton;
×
367
      // discarded reads not counted in the output
368
      // totals += discarded;
369

370
      io_summary_writer summary{ output, io_summary_writer::io::output };
×
371
      summary.write_head("Output", "summary-output");
×
372
      summary.write_row("Passed*", totals);
×
373
      if (config.paired_ended_mode) {
×
374
        summary.write_row("File 1", output_1);
×
375
        summary.write_row("File 2", output_2);
×
376

377
        if (config.is_read_merging_enabled()) {
×
378
          summary.write_row("Merged", merged);
×
379
        }
380

381
        if (config.is_any_filtering_enabled()) {
×
382
          summary.write_row("Singleton", singleton);
×
383
        }
384
      }
385

386
      if (config.is_any_filtering_enabled()) {
×
387
        summary.write_row("Discarded*", discarded);
×
388
      }
389
      summary.write_tail();
×
390

391
      write_html_sampling_note(config, "output", totals, output);
×
392

393
      // Note regarding passed / discarded reads
394
      html_output_footnote()
×
395
        .set_symbol("*")
×
396
        .set_html("The <b>Passed</b> column includes all read types except "
×
397
                  "for <b>Discarded</b> reads.")
398
        .write(output);
×
399
    }
400
  } else if (config.run_type == ar_command::report_only) {
×
401
    io_summary_writer summary{ output, io_summary_writer::io::input };
×
402
    summary.write_head("Input summary", "summary-input");
×
403
    summary.write_row("Input", *stats.input_1);
×
404
    summary.write_tail();
×
405

406
    write_html_sampling_note(config, "input", *stats.input_1, output);
×
407
  }
408

409
  else {
410
    io_summary_writer summary{ output, io_summary_writer::io::input };
×
411
    summary.write_head("Input/Output summary", "summary-input-output");
×
412
    summary.write_row("Input", *stats.input_1);
×
413
    summary.write_row("Output", output_1);
×
414
    if (config.is_any_filtering_enabled()) {
×
415
      summary.write_row("Discarded*", discarded);
×
416
    }
417
    summary.write_tail();
×
418

419
    fastq_statistics totals;
×
420
    totals += *stats.input_1;
×
421
    totals += output_1;
×
422

423
    write_html_sampling_note(config, "input/output", totals, output);
×
424

425
    if (config.is_any_filtering_enabled()) {
×
426
      // Note regarding discarded reads in output
427
      html_output_footnote()
×
428
        .set_symbol("*")
×
429
        .set_html("<b>Discarded</b> reads are not included in the "
×
430
                  "<b>Output</b> column.")
431
        .write(output);
×
432
    }
433
  }
434
}
435

436
//! Trimming statistics
437
struct trimming_stats
438
{
439
  size_t id;
440
  //! Processing stage relative to adapter trimming (pre, X, post)
441
  std::string stage;
442
  //! Row label 1 (step)
443
  std::string label_1;
444
  //! Row label 1 (sub-step)
445
  std::string label_2;
446
  //! Whether or not this step is enabled by command-line options
447
  bool enabled;
448
  //! Number of reads/bases trimmed/filtered
449
  reads_and_bases count;
450
};
451

452
void
453
write_html_trimming_stats(std::ostream& output,
×
454
                          const std::vector<trimming_stats>& stats,
455
                          const reads_and_bases& totals)
456
{
457
  size_t n_processing_steps = 0;
×
458
  size_t n_processing_steps_on = 0;
×
459
  size_t n_filtering_steps = 0;
×
460
  size_t n_filtering_steps_on = 0;
×
461

462
  size_t last_id = -1;
×
463
  size_t last_enabled = -1;
×
464
  for (const auto& it : stats) {
×
465
    if (it.id != last_id) {
×
466
      if (it.stage == "Processing") {
×
467
        n_processing_steps++;
×
468
      } else if (it.stage == "Filtering") {
×
469
        n_filtering_steps++;
×
470
      }
471

472
      last_id = it.id;
×
473
    }
474

475
    if (it.enabled && it.id != last_enabled) {
×
476
      if (it.stage == "Processing") {
×
477
        n_processing_steps_on++;
×
478
      } else if (it.stage == "Filtering") {
×
479
        n_filtering_steps_on++;
×
480
      }
481

482
      last_enabled = it.id;
×
483
    }
484
  }
485

486
  html_summary_trimming_head().write(output);
×
487

488
  std::string previous_stage;
×
489
  std::string previous_label_1;
×
490

491
  for (const auto& it : stats) {
×
492
    if (it.enabled) {
×
493
      const auto label_1 = it.label_1 == previous_label_1 ? "" : it.label_1;
×
494
      const auto stage = it.stage == previous_stage ? "" : it.stage;
×
495

496
      previous_stage = it.stage;
×
497
      previous_label_1 = it.label_1;
×
498

499
      html_summary_trimming_row()
×
500
        .set_stage(stage)
×
501
        .set_label_1(label_1)
×
502
        .set_label_2(it.label_2)
×
503
        .set_reads(format_rough_number(it.count.reads()))
×
504
        .set_pct_reads(format_percentage(it.count.reads(), totals.reads()))
×
505
        .set_bases(format_rough_number(it.count.bases()))
×
506
        .set_pct_bases(format_percentage(it.count.bases(), totals.bases()))
×
507
        .set_avg_bases(format_average_bases(it.count))
×
508
        .write(output);
×
509
    }
510
  }
511

512
  html_summary_trimming_tail()
×
513
    .set_n_enabled_filt(std::to_string(n_filtering_steps_on))
×
514
    .set_n_total_filt(std::to_string(n_filtering_steps))
×
515
    .set_n_enabled_proc(std::to_string(n_processing_steps_on))
×
516
    .set_n_total_proc(std::to_string(n_processing_steps))
×
517
    .write(output);
×
518
}
519

520
//! Filtering statistics
521
struct filtering_stats
522
{
523
  //! Filtering step
524
  std::string label;
525
  //! Whether or not this step is enabled by command-line options
526
  bool enabled;
527
  //! Number of reads/bases trimmed/filtered
528
  reads_and_bases count;
529
};
530

531
reads_and_bases
532
summarize_input(const fastq_stats_ptr& ptr)
×
533
{
534
  const auto n_bases = ptr->length_dist().product();
×
535
  AR_REQUIRE(n_bases >= 0);
×
536

537
  return reads_and_bases{ ptr->number_of_input_reads(),
×
538
                          static_cast<uint64_t>(n_bases) };
539
}
540

541
void
542
build_polyx_trimming_rows(std::vector<trimming_stats>& out,
×
543
                          const std::string& polyx_nucleotides,
544
                          const indexed_count<ACGT>& reads,
545
                          const indexed_count<ACGT>& bases,
546
                          const size_t id)
547
{
548
  for (const auto nucleotide : ACGT::values) {
×
549
    out.push_back(
×
550
      { id,
551
        "Processing",
552
        "Poly-X tails",
553
        std::string(1, nucleotide),
554
        polyx_nucleotides.find(nucleotide) != std::string::npos,
×
555
        reads_and_bases(reads.get(nucleotide), bases.get(nucleotide)) });
×
556
  }
557

558
  out.push_back({ id,
×
559
                  "Processing",
560
                  "Poly-X tails",
561
                  "*",
562
                  polyx_nucleotides.size() > 1,
×
563
                  reads_and_bases(reads.sum(), bases.sum()) });
×
564
}
565

566
void
567
write_html_processing_section(const userconfig& config,
×
568
                              const statistics& stats,
569
                              std::ostream& output)
570
{
571
  trimming_statistics totals;
×
572
  for (const auto& it : stats.trimming) {
×
573
    totals += *it;
×
574
  }
575

576
  uint64_t adapter_reads = 0;
×
577
  uint64_t adapter_bases = 0;
×
578

579
  for (size_t i = 0; i < config.samples.adapters().size(); ++i) {
×
580
    adapter_reads += totals.adapter_trimmed_reads.get(i);
×
581
    adapter_bases += totals.adapter_trimmed_bases.get(i);
×
582
  }
583

584
  const auto total_input =
×
585
    summarize_input(stats.input_1) + summarize_input(stats.input_2);
×
586

587
  reads_and_bases total_output;
×
588
  for (const auto& it : stats.trimming) {
×
589
    total_output += summarize_input(it->read_1);
×
590
    total_output += summarize_input(it->read_2);
×
591
    total_output += summarize_input(it->singleton);
×
592
    total_output += summarize_input(it->merged);
×
593
  }
594

595
  // Trimming steps prior to adapter trimming
596
  size_t step_id = 0;
×
597
  std::vector<trimming_stats> trimming = {
×
598
    { step_id++, "Input", "Raw reads", "-", true, total_input },
×
599
    { step_id++,
×
600
      "Processing",
601
      "Terminal bases",
602
      "-",
603
      config.is_terminal_base_pre_trimming_enabled(),
×
604
      totals.terminal_pre_trimmed },
605
  };
606

607
  build_polyx_trimming_rows(trimming,
×
608
                            config.pre_trim_poly_x,
×
609
                            totals.poly_x_pre_trimmed_reads,
610
                            totals.poly_x_pre_trimmed_bases,
611
                            step_id++);
612

613
  trimming.push_back({ step_id++,
×
614
                       "Processing",
615
                       "Adapters",
616
                       "-",
617
                       config.is_adapter_trimming_enabled(),
×
618
                       reads_and_bases(adapter_reads, adapter_bases) });
619

620
  trimming.push_back({ step_id++,
×
621
                       "Processing",
622
                       "Merging",
623
                       "-",
624
                       config.is_read_merging_enabled(),
×
625
                       totals.reads_merged });
626

627
  trimming.push_back({ step_id++,
×
628
                       "Processing",
629
                       "Terminal bases",
630
                       "-",
631
                       config.is_terminal_base_post_trimming_enabled(),
×
632
                       totals.terminal_post_trimmed });
633

634
  build_polyx_trimming_rows(trimming,
×
635
                            config.post_trim_poly_x,
×
636
                            totals.poly_x_post_trimmed_reads,
637
                            totals.poly_x_post_trimmed_bases,
638
                            step_id++);
639

640
  trimming.push_back({ step_id++,
×
641
                       "Processing",
642
                       "Low quality bases",
643
                       "-",
644
                       config.is_low_quality_trimming_enabled(),
×
645
                       totals.low_quality_trimmed });
646

647
  trimming.push_back({ step_id++,
×
648
                       "Filtering",
649
                       "Short reads",
650
                       "-",
651
                       config.is_short_read_filtering_enabled(),
×
652
                       totals.filtered_min_length });
653

654
  trimming.push_back({ step_id++,
×
655
                       "Filtering",
656
                       "Long reads",
657
                       "-",
658
                       config.is_long_read_filtering_enabled(),
×
659
                       totals.filtered_max_length });
660
  trimming.push_back({ step_id++,
×
661
                       "Filtering",
662
                       "Ambiguous bases",
663
                       "-",
664
                       config.is_ambiguous_base_filtering_enabled(),
×
665
                       totals.filtered_ambiguous });
666
  trimming.push_back({ step_id++,
×
667
                       "Filtering",
668
                       "Mean quality",
669
                       "-",
670
                       config.is_mean_quality_filtering_enabled(),
×
671
                       totals.filtered_mean_quality });
672
  trimming.push_back({ step_id++,
×
673
                       "Filtering",
674
                       "Low complexity reads",
675
                       "-",
676
                       config.is_low_complexity_filtering_enabled(),
×
677
                       totals.filtered_low_complexity });
678

679
  trimming.push_back(
×
680
    { step_id++, "Output", "Filtered reads", "-", true, total_output });
×
681

682
  write_html_trimming_stats(output, trimming, total_input);
×
683
}
684

685
void
686
write_html_section_title(const std::string& title, std::ostream& output)
×
687
{
688
  html_h2_tag().set_title(title).set_href(to_lower(title)).write(output);
×
689
}
690

691
void
692
write_html_io_section(const userconfig& config,
×
693
                      std::ostream& output,
694
                      const std::string& title,
695
                      fastq_stats_vec statistics,
696
                      string_vec names,
697
                      const fastq_stats_ptr& merged = fastq_stats_ptr())
698
{
699
  AR_REQUIRE(statistics.size() == names.size());
×
700

701
  write_html_section_title(title, output);
×
702

703
  const char* dynamic_width =
×
704
    config.paired_ended_mode || merged ? FACET_WIDTH_2 : FACET_WIDTH_1;
×
705

706
  html_plot_title()
×
707
    .set_href(to_lower(title) + "-position-qualities")
×
708
    .set_title("Position quality distribution")
×
709
    .write(output);
×
710
  html_facet_line_plot()
×
711
    .set_x_axis(config.is_read_merging_enabled() && merged ? "null"
×
712
                                                           : "Position"_json)
713
    .set_y_axis("Phred score"_json)
×
714
    .set_width(dynamic_width)
×
715
    .set_values(build_base_qualities(statistics, names))
×
716
    .write(output);
×
717

718
  if (config.is_read_merging_enabled() && merged) {
×
719
    html_facet_line_plot()
×
720
      .set_x_axis("Position"_json)
×
721
      .set_y_axis("Phred score"_json)
×
722
      .set_width(FIGURE_WIDTH)
×
723
      .set_values(build_base_qualities({ merged }, { "Merged" }))
×
724
      .write(output);
×
725
  }
726

727
  html_plot_title()
×
728
    .set_href(to_lower(title) + "-nucleotide-content")
×
729
    .set_title("Nucleotide content")
×
730
    .write(output);
×
731
  html_facet_line_plot()
×
732
    .set_x_axis(config.is_read_merging_enabled() && merged ? "null"
×
733
                                                           : "Position"_json)
734
    .set_y_axis("Frequency"_json)
×
735
    .set_width(dynamic_width)
×
736
    .set_values(build_base_content(statistics, names))
×
737
    .write(output);
×
738

739
  if (config.is_read_merging_enabled() && merged) {
×
740
    html_facet_line_plot()
×
741
      .set_x_axis("Position"_json)
×
742
      .set_y_axis("Frequency"_json)
×
743
      .set_width(FIGURE_WIDTH)
×
744
      .set_values(build_base_content({ merged }, { "Merged" }))
×
745
      .write(output);
×
746

747
    // Subsequent plots should include merged reads
748
    names.push_back("Merged");
×
749
    statistics.push_back(merged);
×
750
  }
751

752
  html_plot_title()
×
753
    .set_href(to_lower(title) + "-quality-scores")
×
754
    .set_title("Quality score distribution")
×
755
    .write(output);
×
756
  html_frequency_plot()
×
757
    .set_x_axis("Phred score"_json)
×
758
    .set_y_axis("Frequency"_json)
×
759
    .set_width(FIGURE_WIDTH)
×
760
    .set_values(build_quality_distribution(statistics, names))
×
761
    .write(output);
×
762

763
  {
×
764
    json_list data;
×
765

766
    for (size_t i = 0; i < statistics.size(); ++i) {
×
767
      const auto m = data.dict();
×
768
      m->str("group", names.at(i));
×
769
      m->i64("offset", 0);
×
770
      m->f64_vec("y", statistics.at(i)->gc_content());
×
771
    }
772

773
    html_plot_title()
×
774
      .set_href(to_lower(title) + "-gc-content")
×
775
      .set_title("GC Content")
×
776
      .write(output);
×
777
    html_frequency_plot()
×
778
      .set_x_axis("%GC"_json)
×
779
      .set_y_axis("Frequency"_json)
×
780
      .set_width(FIGURE_WIDTH)
×
781
      .set_values(data.to_string())
×
782
      .write(output);
×
783
  }
784
}
785

786
void
787
write_html_input_section(const userconfig& config,
×
788
                         const statistics& stats,
789
                         std::ostream& output)
790
{
791
  fastq_stats_vec stats_vec = { stats.input_1 };
×
792
  string_vec names = { "File 1" };
×
793

794
  if (config.paired_ended_mode) {
×
795
    stats_vec.push_back(stats.input_2);
×
796
    names.emplace_back("File 2");
×
797
  }
798

799
  write_html_io_section(config,
×
800
                        output,
801
                        "Input",
802
                        std::move(stats_vec),
803
                        std::move(names));
804
}
805

806
void
807
write_html_analyses_section(const userconfig& config,
×
808
                            const statistics& stats,
809
                            std::ostream& output)
810

811
{
812
  write_html_section_title("Analyses", output);
×
813

814
  // Insert size distribution
815
  if (config.paired_ended_mode) {
×
816
    counts insert_sizes;
×
817
    for (const auto& it : stats.trimming) {
×
818
      insert_sizes += it->insert_sizes;
×
819
    }
820

821
    json_list samples;
×
822
    const auto sample = samples.dict();
×
823
    sample->str("group", "insert_sizes");
×
824
    sample->i64("offset", 0);
×
825
    sample->i64_vec("y", insert_sizes);
×
826

827
    // FIXME: Specify "identified reads" when in demultiplexing mode and
828
    // correct format_percentage to merged / n_identified.
829
    std::ostringstream ss;
×
830
    ss << "Insert sizes inferred for "
×
831
       << format_percentage(insert_sizes.sum(),
×
832
                            stats.input_1->number_of_input_reads())
×
833
       << " of reads";
×
834

835
    html_plot_title()
×
836
      .set_href("analyses-insert-sizes")
×
837
      .set_title("Insert-size distribution")
×
838
      .write(output);
×
839
    html_plot_sub_title().set_sub_title(ss.str()).write(output);
×
840
    html_frequency_plot()
×
841
      .set_x_axis("Insert size"_json)
×
842
      .set_y_axis("Frequency"_json)
×
843
      .set_legend("null")
×
844
      .set_width(FIGURE_WIDTH)
×
845
      .set_values(samples.to_string())
×
846
      .write(output);
×
847

848
    if (config.run_type == ar_command::report_only) {
×
849
      html_output_note()
×
850
        .set_text(
×
851
          "Insert size distribution inferred using adapter-free alignments.")
852
        .write(output);
×
853
    }
854
  }
855

856
  if (config.report_duplication) {
×
857
    AR_REQUIRE(stats.duplication_1 && stats.duplication_2);
×
858
    const auto dupes_1 = stats.duplication_1->summarize();
×
859
    const auto dupes_2 = stats.duplication_2->summarize();
×
860
    const auto mean_uniq_frac = (dupes_1.unique_frac + dupes_2.unique_frac) / 2;
×
861

862
    const auto to_percent = [](double value) {
×
863
      std::ostringstream os;
×
864
      os << std::fixed << std::setprecision(1) << (value * 100.0) << " %";
×
865
      return os.str();
×
866
    };
867

868
    html_duplication_head().write(output);
×
869
    if (config.paired_ended_mode) {
×
870
      html_duplication_body_pe()
×
871
        .set_pct_unique(to_percent(mean_uniq_frac))
×
872
        .set_pct_unique_1(to_percent(dupes_1.unique_frac))
×
873
        .set_pct_unique_2(to_percent(dupes_2.unique_frac))
×
874
        .write(output);
×
875
    } else {
876
      html_duplication_body_se()
×
877
        .set_pct_unique(to_percent(dupes_1.unique_frac))
×
878
        .write(output);
×
879
    }
880

881
    const auto add_line = [](json_list& list,
×
882
                             std::string_view read,
883
                             std::string_view group,
884
                             const std::vector<std::string>& labels,
885
                             const rates& values) {
886
      AR_REQUIRE(labels.size() == values.size());
×
887
      for (size_t i = 0; i < labels.size(); ++i) {
×
888
        auto dict = list.dict();
×
889
        dict->str("read", read);
×
890
        dict->str("group", group);
×
891
        dict->str("x", labels.at(i));
×
892
        dict->f64("y", values.get(i));
×
893
      }
894
    };
895

896
    json_list data;
×
897
    const auto add_lines = [add_line, &data](const decltype(dupes_1)& s,
×
898
                                             std::string_view label) {
899
      add_line(data, label, "All", s.labels, s.total_sequences);
×
900
      add_line(data, label, "Unique", s.labels, s.unique_sequences);
×
901
    };
902

903
    add_lines(dupes_1, "File 1");
×
904
    if (config.paired_ended_mode) {
×
905
      add_lines(dupes_2, "File 2");
×
906
    }
907

908
    html_duplication_plot()
×
909
      .set_width(config.paired_ended_mode ? FACET_WIDTH_2 : FACET_WIDTH_1)
×
910
      .set_values(data.to_string())
×
911
      .write(output);
×
912
  }
913

914
  // Consensus adapter sequence inference
915
  if (config.paired_ended_mode && config.run_type == ar_command::report_only) {
×
916
    AR_REQUIRE(stats.adapter_id);
×
917

918
    const auto adapter_1 = stats.adapter_id->adapter1.summarize();
×
919
    const auto adapter_2 = stats.adapter_id->adapter2.summarize();
×
920

921
    // Consensus adapter sequences
922
    {
×
923
      const auto reference_adapters =
×
924
        config.samples.adapters().to_read_orientation().front();
×
925
      std::string reference_adapter_1{ reference_adapters.first };
×
926
      std::string reference_adapter_2{ reference_adapters.second };
×
927

928
      html_consensus_adapter_head()
×
929
        .set_overlapping_pairs(
×
930
          format_rough_number(stats.adapter_id->aligned_pairs))
×
931
        .set_pairs_with_adapters(
×
932
          format_rough_number(stats.adapter_id->pairs_with_adapters))
×
933
        .write(output);
×
934

935
      html_consensus_adapter_table()
×
936
        .set_name_1("--adapter1")
×
937
        .set_reference_1(reference_adapter_1)
×
938
        .set_alignment_1(adapter_1.compare_with(reference_adapter_1))
×
939
        .set_consensus_1(adapter_1.adapter().sequence())
×
940
        .set_qualities_1(adapter_1.adapter().qualities())
×
941
        .set_name_2("--adapter2")
×
942
        .set_reference_2(reference_adapter_2)
×
943
        .set_alignment_2(adapter_2.compare_with(reference_adapter_2))
×
944
        .set_consensus_2(adapter_2.adapter().sequence())
×
945
        .set_qualities_2(adapter_2.adapter().qualities())
×
946
        .write(output);
×
947
    }
948

949
    // Top N most common 5' kmers in adapter fragments
950
    {
×
951
      const auto& top_kmers_1 = adapter_1.top_kmers();
×
952
      const auto& top_kmers_2 = adapter_2.top_kmers();
×
953

954
      html_consensus_adapter_kmer_head()
×
955
        .set_n_kmers(std::to_string(consensus_adapter_stats::top_n_kmers))
×
956
        .set_kmer_length(std::to_string(consensus_adapter_stats::kmer_length))
×
957
        .write(output);
×
958

959
      const auto kmers = std::max(top_kmers_1.size(), top_kmers_2.size());
×
960
      for (size_t i = 0; i < kmers; ++i) {
×
961
        html_consensus_adapter_kmer_row row;
×
962
        row.set_index(std::to_string(i + 1));
×
963

964
        if (top_kmers_1.size() > i) {
×
965
          const auto& kmer = top_kmers_1.at(i);
×
966

967
          row.set_kmer_1(kmer.first)
×
968
            .set_count_1(format_rough_number(kmer.second))
×
969
            .set_pct_1(format_percentage(kmer.second, adapter_1.total_kmers()));
×
970
        }
971

972
        if (top_kmers_2.size() > i) {
×
973
          const auto& kmer = top_kmers_2.at(i);
×
974

975
          row.set_kmer_2(kmer.first)
×
976
            .set_count_2(format_rough_number(kmer.second))
×
977
            .set_pct_2(format_percentage(kmer.second, adapter_2.total_kmers()));
×
978
        }
979

980
        row.write(output);
×
981
      }
982

983
      html_consensus_adapter_kmer_tail().write(output);
×
984
    }
985
  }
986
}
987

988
void
989
write_html_demultiplexing_barplot(const userconfig& config,
×
990
                                  const statistics& stats,
991
                                  std::ostream& output)
992
{
993
  json_list data;
×
994

995
  const size_t input_reads = stats.input_1->number_of_input_reads() +
×
996
                             stats.input_2->number_of_input_reads();
×
997

998
  for (size_t i = 0; i < config.samples.size(); ++i) {
×
999
    const auto& sample = config.samples.at(i);
×
1000

1001
    for (size_t j = 0; j < sample.size(); ++j) {
×
1002
      auto count = stats.demultiplexing->samples.at(i).get(j);
×
1003

1004
      const auto& sequences = sample.at(j);
×
1005
      std::string key{ sequences.barcode_1 };
×
1006
      if (!sequences.barcode_2.empty()) {
×
1007
        key.push_back('-');
×
1008
        key.append(sequences.barcode_2);
×
1009
      }
1010

1011
      auto m = data.dict();
×
1012
      m->i64("n", j + 1);
×
1013
      m->str("barcodes", key);
×
1014

NEW
1015
      if (sequences.orientation != barcode_orientation::unspecified) {
×
NEW
1016
        m->str("orientation", orientation_to_label(sequences));
×
1017
      }
1018

UNCOV
1019
      m->str("sample", sample.name());
×
1020

1021
      if (input_reads) {
×
1022
        m->f64("pct", (100.0 * count) / input_reads);
×
1023
      } else {
1024
        m->null("pct");
×
1025
      }
1026
    }
1027
  }
1028

1029
  html_plot_title()
×
1030
    .set_href("demux-samples")
×
1031
    .set_title("Samples identified")
×
1032
    .write(output);
×
1033
  html_bar_plot()
×
1034
    .set_x_axis("Samples"_json)
×
1035
    .set_y_axis("Percent"_json)
×
1036
    .set_width(FIGURE_WIDTH)
×
1037
    .set_values(data.to_string())
×
1038
    .write(output);
×
1039
}
1040

1041
void
1042
write_html_demultiplexing_table(const userconfig& config,
×
1043
                                const statistics& stats,
1044
                                std::ostream& output,
1045
                                const bool multiple_barcodes,
1046
                                const bool mixed_orientation)
1047
{
1048
  const size_t input_reads = stats.input_1->number_of_input_reads() +
×
1049
                             stats.input_2->number_of_input_reads();
×
1050

NEW
1051
  html_demultiplexing_table_head()
×
NEW
1052
    .set_orientation(mixed_orientation ? "<th></th>" : "")
×
NEW
1053
    .write(output);
×
1054

1055
  {
×
1056
    const size_t unidentified = stats.demultiplexing->unidentified;
×
1057

1058
    fastq_statistics total;
×
1059
    total += *stats.demultiplexing->unidentified_stats_1;
×
1060
    total += *stats.demultiplexing->unidentified_stats_2;
×
1061

1062
    const auto output_reads = total.length_dist().sum();
×
1063
    const auto output_bp = total.nucleotides_pos().sum();
×
1064

1065
    html_demultiplexing_row()
×
1066
      .set_name("<b>Unidentified</b>")
×
1067
      .set_sample_pct(format_percentage(unidentified, input_reads, 2))
×
1068
      .set_reads(format_rough_number(output_reads))
×
1069
      .set_bp(format_rough_number(output_bp))
×
1070
      .set_length(mean_of_bp_counts(total.length_dist()))
×
1071
      .set_gc(format_percentage(total.nucleotides_gc_pos().sum(), output_bp))
×
NEW
1072
      .set_orientation(mixed_orientation ? "<td></td>" : "")
×
UNCOV
1073
      .write(output);
×
1074
  }
1075

1076
  size_t sample_idx = 0;
×
1077
  for (const auto& sample : config.samples) {
×
1078
    const auto& output_stats = *stats.trimming.at(sample_idx);
×
1079
    const auto& barcode_counts = stats.demultiplexing->samples.at(sample_idx);
×
1080
    const auto sample_reads = barcode_counts.sum();
×
1081

1082
    fastq_statistics total;
×
1083

1084
    total += *output_stats.read_1;
×
1085
    total += *output_stats.read_2;
×
1086
    total += *output_stats.merged;
×
1087
    total += *output_stats.singleton;
×
1088
    // Not included in overview:
1089
    // total += *sample.discarded;
1090

1091
    const auto output_reads = total.length_dist().sum();
×
1092
    const auto output_bp = total.nucleotides_pos().sum();
×
1093

1094
    html_demultiplexing_row row;
×
1095
    if (sample.size() < 2) {
×
NEW
1096
      const auto& it = sample.at(0);
×
NEW
1097
      row.set_barcode_1(std::string{ it.barcode_1 })
×
NEW
1098
        .set_barcode_2(std::string{ it.barcode_2 });
×
1099

NEW
1100
      if (mixed_orientation) {
×
NEW
1101
        row.set_orientation("<td>" + orientation_to_label(it) + "</td>");
×
1102
      }
1103
    } else {
NEW
1104
      const auto cell = "<i>" + std::to_string(sample.size()) + " barcodes</i>";
×
1105
      row.set_barcode_1(cell).set_barcode_2(cell);
×
1106

NEW
1107
      if (mixed_orientation) {
×
NEW
1108
        row.set_orientation("<td></td>");
×
1109
      }
1110
    }
1111

1112
    row.set_n(std::to_string(sample_idx + 1))
×
1113
      .set_name(sample.name())
×
1114
      .set_sample_pct(format_percentage(sample_reads, input_reads, 2))
×
1115
      .set_reads(format_rough_number(output_reads))
×
1116
      .set_bp(format_rough_number(output_bp))
×
1117
      .set_length(mean_of_bp_counts(total.length_dist()))
×
1118
      .set_gc(format_percentage(total.nucleotides_gc_pos().sum(), output_bp))
×
1119
      .write(output);
×
1120

1121
    if (sample.size() > 1) {
×
1122
      const auto total = barcode_counts.sum();
×
1123

1124
      for (size_t j = 0; j < sample.size(); j++) {
×
1125
        const auto& it = sample.at(j);
×
1126
        const auto count = barcode_counts.get(j);
×
1127

NEW
1128
        html_demultiplexing_barcode_row row;
×
NEW
1129
        row.set_barcode_1(std::string{ it.barcode_1 })
×
1130
          .set_barcode_2(std::string{ it.barcode_2 })
×
NEW
1131
          .set_barcode_pct_row(format_percentage(count, total, 2));
×
1132

NEW
1133
        if (mixed_orientation) {
×
NEW
1134
          row.set_orientation("<td>" + orientation_to_label(it) + "</td>");
×
1135
        }
1136

NEW
1137
        row.write(output);
×
1138
      }
1139
    }
1140

1141
    ++sample_idx;
×
1142
  }
1143

1144
  html_demultiplexing_table_tail().write(output);
×
1145

NEW
1146
  if (multiple_barcodes || mixed_orientation) {
×
1147
    html_demultiplexing_toggle().write(output);
×
1148
  }
1149
}
1150

1151
void
1152
write_html_demultiplexing_section(const userconfig& config,
×
1153
                                  const statistics& stats,
1154
                                  std::ostream& output)
1155

1156
{
1157
  bool multiple_barcodes = false;
×
NEW
1158
  bool mixed_orientation = false;
×
1159
  for (const auto& sample : config.samples) {
×
NEW
1160
    multiple_barcodes |= sample.size() > 1;
×
NEW
1161
    for (const auto& it : sample) {
×
NEW
1162
      mixed_orientation |= it.orientation != barcode_orientation::unspecified;
×
1163
    }
1164
  }
1165

1166
  write_html_section_title("Demultiplexing", output);
×
1167
  html_demultiplexing_head().write(output);
×
1168
  write_html_demultiplexing_barplot(config, stats, output);
×
NEW
1169
  write_html_demultiplexing_table(config,
×
1170
                                  stats,
1171
                                  output,
1172
                                  multiple_barcodes,
1173
                                  mixed_orientation);
1174
}
1175

1176
void
1177
write_html_output_section(const userconfig& config,
×
1178
                          const statistics& stats,
1179
                          std::ostream& output)
1180

1181
{
1182
  fastq_stats_vec stats_vec;
×
1183
  string_vec names;
×
1184

1185
  auto merged = std::make_shared<fastq_statistics>();
×
1186

1187
  {
×
1188
    auto output_1 = std::make_shared<fastq_statistics>();
×
1189
    auto output_2 = std::make_shared<fastq_statistics>();
×
1190
    auto singleton = std::make_shared<fastq_statistics>();
×
1191
    auto discarded = std::make_shared<fastq_statistics>();
×
1192

1193
    for (const auto& it : stats.trimming) {
×
1194
      *output_1 += *it->read_1;
×
1195
      *output_2 += *it->read_2;
×
1196
      *merged += *it->merged;
×
1197
      *singleton += *it->singleton;
×
1198
      *discarded += *it->discarded;
×
1199
    }
1200

1201
    stats_vec.push_back(output_1);
×
1202
    names.emplace_back("Output 1");
×
1203

1204
    if (config.paired_ended_mode) {
×
1205
      stats_vec.push_back(output_2);
×
1206
      names.emplace_back("Output 2");
×
1207

1208
      if (config.is_any_filtering_enabled()) {
×
1209
        stats_vec.push_back(singleton);
×
1210
        names.emplace_back("Singleton");
×
1211
      }
1212
    }
1213

1214
    if (config.is_any_filtering_enabled()) {
×
1215
      stats_vec.push_back(discarded);
×
1216
      names.emplace_back("Discarded");
×
1217
    }
1218
  }
1219

1220
  write_html_io_section(config,
×
1221
                        output,
1222
                        "Output",
1223
                        std::move(stats_vec),
1224
                        std::move(names),
1225
                        merged);
1226
}
1227

1228
} // namespace
1229

1230
////////////////////////////////////////////////////////////////////////////////
1231

1232
bool
1233
write_html_report(const userconfig& config,
×
1234
                  const statistics& stats,
1235
                  const std::string& filename)
1236
{
1237
  if (filename == DEV_NULL) {
×
1238
    // User disabled the report
1239
    return true;
1240
  }
1241

1242
  std::ostringstream output;
×
1243

1244
  write_html_summary_section(config, stats, output);
×
1245

1246
  if (config.run_type != ar_command::demultiplex_only &&
×
1247
      config.run_type != ar_command::report_only) {
1248
    write_html_processing_section(config, stats, output);
×
1249
  }
1250

1251
  write_html_input_section(config, stats, output);
×
1252

1253
  if (config.paired_ended_mode || config.report_duplication ||
×
1254
      config.run_type == ar_command::report_only) {
×
1255
    write_html_analyses_section(config, stats, output);
×
1256
  }
1257

1258
  if (config.is_demultiplexing_enabled()) {
×
1259
    write_html_demultiplexing_section(config, stats, output);
×
1260
  }
1261

1262
  if (config.run_type != ar_command::report_only) {
×
1263
    write_html_output_section(config, stats, output);
×
1264
  }
1265

1266
  html_body_end().write(output);
×
1267

1268
  try {
×
1269
    managed_writer writer{ filename };
×
1270
    writer.write(output.str());
×
1271
    writer.close();
×
1272
  } catch (const std::ios_base::failure& error) {
×
1273
    log::error() << "Error writing JSON report to '" << filename << "':\n"
×
1274
                 << indent_lines(error.what());
×
1275
    return false;
×
1276
  }
×
1277

1278
  return true;
×
1279
}
1280

1281
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc