• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #45

20 Sep 2024 06:49PM UTC coverage: 26.244% (-49.2%) from 75.443%
#45

push

travis-ci

web-flow
attempt to fix coveralls run

2458 of 9366 relevant lines covered (26.24%)

4362.23 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/main_adapter_id.cpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2015 by Mikkel Schubert - mikkelsch@gmail.com           *
5
 *                                                                       *
6
 * This program is free software: you can redistribute it and/or modify  *
7
 * it under the terms of the GNU General Public License as published by  *
8
 * the Free Software Foundation, either version 3 of the License, or     *
9
 * (at your option) any later version.                                   *
10
 *                                                                       *
11
 * This program is distributed in the hope that it will be useful,       *
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
14
 * GNU General Public License for more details.                          *
15
 *                                                                       *
16
 * You should have received a copy of the GNU General Public License     *
17
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
18
\*************************************************************************/
19
#include "adapter_id.hpp"    // for adapter_id_statistics
20
#include "alignment.hpp"     // for extract_adapter_sequences, sequence_aligner
21
#include "debug.hpp"         // for AR_REQUIRE
22
#include "fastq.hpp"         // for ACGTN, fastq, ACGT, ACGT:...
23
#include "fastq_io.hpp"      // for read_fastq, read_chunk
24
#include "output.hpp"        // for output_files
25
#include "reports.hpp"       // for write_html_report, write_json_report
26
#include "scheduler.hpp"     // for threadstate, scheduler, analytical_step
27
#include "sequence_sets.hpp" // for adapter_set
28
#include "userconfig.hpp"    // for userconfig
29
#include <cstddef>           // for size_t
30
#include <string>            // for string, operator<<, char_traits
31

32
namespace adapterremoval {
33

34
class reads_sink : public analytical_step
35
{
36
public:
37
  reads_sink()
×
38
    : analytical_step(processing_order::unordered, "reads_sink")
×
39
  {
40
  }
41

42
  chunk_vec process(chunk_ptr /* chunk */) override { return {}; }
×
43
};
44

45
class adapter_identification : public analytical_step
46
{
47
public:
48
  explicit adapter_identification(const userconfig& config, statistics& stats)
×
49
    : analytical_step(processing_order::unordered, "adapter_identification")
×
50
    , m_config(config)
×
51
    , m_sink_id(stats.adapter_id)
×
52
  {
53
    AR_REQUIRE(!stats.trimming.empty());
×
54
    AR_REQUIRE(stats.adapter_id);
×
55
    m_sink_trim = stats.trimming.back();
×
56

57
    // FIXME: Shouldn't be adapter specific
58
    m_stats_id.emplace_back_n(m_config.max_threads,
×
59
                              m_sink_id->adapter1.max_length());
×
60
    m_stats_ins.emplace_back_n(m_config.max_threads);
×
61
  }
62

63
  ~adapter_identification() override = default;
×
64

×
65
  chunk_vec process(chunk_ptr chunk) override
×
66
  {
67
    AR_REQUIRE(chunk);
×
68

69
    const adapter_set adapters = { { "", "" } };
×
70

71
    auto aligner = sequence_aligner(adapters, m_config.simd);
×
72

73
    auto stats_id = m_stats_id.acquire();
×
74
    auto stats_ins = m_stats_ins.acquire();
75

×
76
    AR_REQUIRE(chunk->reads_1.size() == chunk->reads_2.size());
×
77
    auto it_1 = chunk->reads_1.begin();
78
    auto it_2 = chunk->reads_2.begin();
×
79

×
80
    for (; it_1 != chunk->reads_1.end(); ++it_1, ++it_2) {
×
81
      auto& read_1 = *it_1;
82
      auto& read_2 = *it_2;
×
83

×
84
      // Reverse complement to match the orientation of read1
×
85
      read_2.reverse_complement();
86

87
      const auto alignment =
×
88
        aligner.align_paired_end(read_1, read_2, m_config.shift);
89

×
90
      if (m_config.is_good_alignment(alignment)) {
×
91
        stats_id->aligned_pairs++;
92
        if (m_config.can_merge_alignment(alignment)) {
×
93
          const size_t insert_size = alignment.insert_size(read_1, read_2);
×
94
          stats_ins->insert_sizes.resize_up_to(insert_size + 1);
×
95
          stats_ins->insert_sizes.inc(insert_size);
×
96

×
97
          if (extract_adapter_sequences(alignment, read_1, read_2)) {
×
98
            stats_id->pairs_with_adapters++;
99

×
100
            stats_id->adapter1.process(read_1.sequence());
×
101
            read_2.reverse_complement();
102
            stats_id->adapter2.process(read_2.sequence());
×
103
          }
×
104
        }
×
105
      }
106
    }
107

108
    m_stats_id.release(stats_id);
109
    m_stats_ins.release(stats_ins);
110

×
111
    return {};
×
112
  }
113

×
114
  void finalize() override
115
  {
116
    m_stats_id.merge_into(*m_sink_id);
×
117
    m_stats_ins.merge_into(*m_sink_trim);
118
  }
×
119

×
120
  adapter_identification(const adapter_identification&) = delete;
121
  adapter_identification(adapter_identification&&) = delete;
122
  adapter_identification& operator=(const adapter_identification&) = delete;
123
  adapter_identification& operator=(adapter_identification&&) = delete;
124

125
private:
126
  const userconfig& m_config;
127

128
  threadstate<adapter_id_statistics> m_stats_id{};
129
  threadstate<trimming_statistics> m_stats_ins{};
130

131
  trim_stats_ptr m_sink_trim{};
132
  adapter_id_stats_ptr m_sink_id{};
133
};
134

135
int
136
identify_adapter_sequences(const userconfig& config)
137
{
138
  scheduler sch;
×
139

140
  // FIXME: Length picked based on known sequences
×
141
  const auto max_adapter_length = config.paired_ended_mode ? 42 : 0;
142
  statistics stats = statistics_builder()
143
                       .sample_rate(config.report_sample_rate)
×
144
                       .estimate_duplication(config.report_duplication)
×
145
                       .adapter_identification(max_adapter_length)
×
146
                       .initialize();
×
147
  // FIXME: Required for insert size statistics
×
148
  stats.trimming.push_back(std::make_shared<trimming_statistics>());
×
149

150
  // Step 3:
×
151
  size_t final_step;
152
  if (config.paired_ended_mode) {
153
    // Attempt to identify adapters through pair-wise alignments
×
154
    final_step = sch.add<adapter_identification>(config, stats);
×
155
  } else {
156
    // Discard all written reads
×
157
    final_step = sch.add<reads_sink>();
158
  }
159

×
160
  // Step 2: Post-processing, validate, and collect statistics on FASTQ reads
161
  const size_t postproc_step =
162
    sch.add<post_process_fastq>(config, final_step, stats);
163

×
164
  // Step 1: Read input file(s)
×
165
  read_fastq::add_steps(sch, config, postproc_step);
166

167
  if (!sch.run(config.max_threads)) {
×
168
    return 1;
169
  }
×
170

171
  const auto out_files = config.get_output_filenames();
172
  if (!write_json_report(config, stats, out_files.settings_json)) {
173
    return 1;
×
174
  }
×
175

176
  return !write_html_report(config, stats, out_files.settings_html);
177
}
178

×
179
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc