• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #36

22 Jul 2024 09:33AM UTC coverage: 87.26% (-12.7%) from 100.0%
#36

push

travis-ci

MikkelSchubert
remove duplicate tests

2185 of 2504 relevant lines covered (87.26%)

16293.15 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

73.38
/src/linereader.cpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2015 by Mikkel Schubert - mikkelsch@gmail.com           *
5
 *                                                                       *
6
 * This program is free software: you can redistribute it and/or modify  *
7
 * it under the terms of the GNU General Public License as published by  *
8
 * the Free Software Foundation, either version 3 of the License, or     *
9
 * (at your option) any later version.                                   *
10
 *                                                                       *
11
 * This program is distributed in the hope that it will be useful,       *
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
14
 * GNU General Public License for more details.                          *
15
 *                                                                       *
16
 * You should have received a copy of the GNU General Public License     *
17
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
18
\*************************************************************************/
19
#include "linereader.hpp"    // declarations
20
#include "debug.hpp"         // for AR_FAIL
21
#include "errors.hpp"        // for io_error
22
#include "logging.hpp"       // for warn, log_stream
23
#include "managed_io.hpp"    // for managed_writer
24
#include "strutils.hpp"      // for shell_escape
25
#include <cerrno>            // for errno
26
#include <cstdint>           // for uint8_t
27
#include <cstring>           // for strerror, memchr
28
#include <isa-l/igzip_lib.h> // for inflate_state, isal_gzip_header, isal_...
29
#include <memory>            // for unique_ptr, shared_ptr, __shared_ptr_a...
30
#include <sstream>           // for operator<<, basic_ostream
31

32
namespace adapterremoval {
33

34
///////////////////////////////////////////////////////////////////////////////
35
// Implementations for 'gzip_error'
36

37
[[noreturn]] void
38
throw_gzip_error(const std::string& filename,
×
39
                 const char* action,
40
                 const char* error,
41
                 const char* diagnosis = "file is likely corrupt")
42
{
43
  std::ostringstream stream;
×
44
  stream << "Error while " << action << " " << shell_escape(filename) << ": "
×
45
         << error << "; " << diagnosis;
×
46

47
  throw gzip_error(stream.str());
×
48
}
×
49

50
///////////////////////////////////////////////////////////////////////////////
51
// Helper functions for isa-l
52

53
void
54
check_isal_return_code(int returncode,
30✔
55
                       const std::string& file,
56
                       const char* action)
57
{
58
  switch (returncode) {
30✔
59
    case ISAL_DECOMP_OK:
30✔
60
      return;
30✔
61

62
    case ISAL_END_INPUT:
×
63
      throw_gzip_error(file, action, "end of input reached");
×
64

65
    case ISAL_OUT_OVERFLOW:
×
66
      throw_gzip_error(file, action, "end of output reached");
×
67

68
    case ISAL_NAME_OVERFLOW:
×
69
      throw_gzip_error(file, action, "end of gzip name buffer reached");
×
70

71
    case ISAL_COMMENT_OVERFLOW:
×
72
      throw_gzip_error(file, action, "end of gzip comment buffer reached");
×
73

74
    case ISAL_EXTRA_OVERFLOW:
×
75
      throw_gzip_error(file, action, "end of extra buffer reached");
×
76

77
    case ISAL_NEED_DICT:
×
78
      throw_gzip_error(file, action, "stream needs dictionary to continue");
×
79

80
    case ISAL_INVALID_BLOCK:
×
81
      throw_gzip_error(file, action, "invalid deflate block found");
×
82

83
    case ISAL_INVALID_SYMBOL:
×
84
      throw_gzip_error(file, action, "invalid deflate symbol found");
×
85

86
    case ISAL_INVALID_LOOKBACK:
×
87
      throw_gzip_error(file, action, "invalid lookback distance found");
×
88

89
    case ISAL_INVALID_WRAPPER:
×
90
      throw_gzip_error(file, action, "invalid gzip/zlib wrapper found");
×
91

92
    case ISAL_UNSUPPORTED_METHOD:
×
93
      throw_gzip_error(file, action, "unsupported compression method");
×
94

95
    case ISAL_INCORRECT_CHECKSUM:
×
96
      throw_gzip_error(file, action, "incorrect checksum found");
×
97

98
    default:
×
99
      throw_gzip_error(file, action, "unknown error");
×
100
  }
101
}
102

103
///////////////////////////////////////////////////////////////////////////////
104
// Implementations for 'vec_reader'
105

106
vec_reader::vec_reader(const string_vec& lines)
19✔
107
  : m_lines(lines)
19✔
108
  , m_it(m_lines.begin())
57✔
109
{
110
}
19✔
111

112
bool
113
vec_reader::getline(std::string& dst)
90✔
114
{
115
  if (m_it == m_lines.end()) {
270✔
116
    return false;
117
  }
118

119
  dst = *m_it++;
240✔
120
  return true;
80✔
121
}
122

123
///////////////////////////////////////////////////////////////////////////////
124
// Implementations for 'line_reader'
125

126
line_reader::line_reader(FILE* handle)
14✔
127
  : m_reader(handle)
14✔
128
  , m_gzip_stream(nullptr)
13✔
129
  , m_gzip_header(nullptr)
13✔
130
  , m_buffer(nullptr)
13✔
131
  , m_buffer_ptr(nullptr)
13✔
132
  , m_buffer_end(nullptr)
13✔
133
  , m_raw_buffer(std::make_shared<line_buffer>())
13✔
134
  , m_raw_buffer_end(m_raw_buffer->begin())
39✔
135
  , m_eof(false)
41✔
136
{
137
  refill_buffers();
13✔
138
}
14✔
139

140
line_reader::line_reader(std::string filename)
1✔
141
  : m_reader(std::move(filename))
2✔
142
  , m_gzip_stream(nullptr)
×
143
  , m_gzip_header(nullptr)
×
144
  , m_buffer(nullptr)
×
145
  , m_buffer_ptr(nullptr)
×
146
  , m_buffer_end(nullptr)
×
147
  , m_raw_buffer(std::make_shared<line_buffer>())
×
148
  , m_raw_buffer_end(m_raw_buffer->begin())
×
149
  , m_eof(false)
2✔
150
{
151
  refill_buffers();
×
152
}
1✔
153

154
bool
155
line_reader::getline(std::string& dst)
51✔
156
{
157
  dst.clear();
51✔
158

159
  while (!m_eof) {
76✔
160
    const size_t length = m_buffer_end - m_buffer_ptr;
62✔
161
    auto* ptr = static_cast<char*>(memchr(m_buffer_ptr, '\n', length));
62✔
162
    if (ptr) {
62✔
163
      // Excluding terminal \n
164
      dst.append(m_buffer_ptr, ptr - m_buffer_ptr);
37✔
165
      if (!dst.empty() && dst.back() == '\r') {
107✔
166
        // Excluding terminal \r; this may have been added in the loop prior to
167
        // \n being found, if \n is the first character in the buffer. It is
168
        // therefore easiest to check dst directly.
169
        dst.pop_back();
10✔
170
      }
171

172
      m_buffer_ptr = ptr + 1;
37✔
173
      return true;
37✔
174
    }
175

176
    // Can potentially introduce a \r; this is handled above.
177
    dst.append(m_buffer_ptr, length);
25✔
178
    refill_buffers();
25✔
179
  }
180

181
  return !dst.empty();
28✔
182
}
183

184
void
185
line_reader::refill_buffers()
38✔
186
{
187
  if (m_buffer) {
76✔
188
    if (m_gzip_stream) {
50✔
189
      refill_buffers_gzip();
18✔
190
    } else {
191
      refill_raw_buffer();
7✔
192
      refill_buffers_uncompressed();
7✔
193
    }
194
  } else {
195
    refill_raw_buffer();
13✔
196

197
    if (is_raw_buffer_gzip()) {
13✔
198
      initialize_buffers_gzip();
5✔
199
    } else {
200
      refill_buffers_uncompressed();
8✔
201
    }
202
  }
203
}
38✔
204

205
void
206
line_reader::refill_buffers_uncompressed()
15✔
207
{
208
  m_buffer = m_raw_buffer;
15✔
209
  m_buffer_ptr = m_raw_buffer->data();
45✔
210
  m_buffer_end = m_raw_buffer_end;
15✔
211
}
15✔
212

213
void
214
line_reader::refill_raw_buffer(size_t avail_in)
33✔
215
{
216
  if (avail_in) {
33✔
217
    // Move unused (compressed) data to the front of the buffer
218
    std::memmove(m_raw_buffer->data(), m_raw_buffer_end - avail_in, avail_in);
27✔
219
  }
220

221
  const size_t nread = m_reader.read(m_raw_buffer->data() + avail_in,
132✔
222
                                     m_raw_buffer->size() - avail_in);
66✔
223

224
  // EOF set only once all data has been consumed
225
  m_eof = (nread + avail_in == 0);
33✔
226
  m_raw_buffer_end = m_raw_buffer->data() + nread + avail_in;
99✔
227
}
33✔
228

229
bool
230
line_reader::is_raw_buffer_gzip() const
26✔
231
{
232
  return m_raw_buffer_end - m_raw_buffer->data() > 1 &&
52✔
233
         m_raw_buffer->at(0) == '\x1f' && m_raw_buffer->at(1) == '\x8b';
92✔
234
}
235

236
void
237
line_reader::initialize_buffers_gzip()
5✔
238
{
239
  m_buffer = std::make_shared<line_buffer>();
15✔
240
  m_buffer_ptr = m_buffer->end();
15✔
241
  m_buffer_end = m_buffer->end();
15✔
242

243
  m_gzip_stream = std::make_unique<inflate_state>();
20✔
244
  m_gzip_header = std::make_unique<isal_gzip_header>();
20✔
245

246
  isal_inflate_init(m_gzip_stream.get());
10✔
247
  m_gzip_stream->crc_flag = ISAL_GZIP_NO_HDR_VER;
10✔
248
  m_gzip_stream->avail_in = m_raw_buffer_end - m_raw_buffer->data();
20✔
249
  m_gzip_stream->next_in = reinterpret_cast<uint8_t*>(m_raw_buffer->data());
20✔
250

251
  isal_gzip_header_init(m_gzip_header.get());
10✔
252
  auto result = isal_read_gzip_header(m_gzip_stream.get(), m_gzip_header.get());
15✔
253
  check_isal_return_code(
5✔
254
    result, m_reader.filename(), "reading first gzip header from");
5✔
255
}
5✔
256

257
void
258
line_reader::refill_buffers_gzip()
18✔
259
{
260
  m_gzip_stream->avail_out = m_buffer->size();
54✔
261
  m_gzip_stream->next_out = reinterpret_cast<uint8_t*>(m_buffer->data());
72✔
262

263
  // Refill the buffer if empty or if a block was finished. This ensures that we
264
  // can properly identify additional gzip blocks and parse their headers.
265
  if (!m_gzip_stream->avail_in ||
50✔
266
      m_gzip_stream->block_state == isal_block_state::ISAL_BLOCK_FINISH) {
28✔
267
    refill_raw_buffer(m_gzip_stream->avail_in);
26✔
268
    m_gzip_stream->avail_in = m_raw_buffer_end - m_raw_buffer->data();
52✔
269
    m_gzip_stream->next_in = reinterpret_cast<uint8_t*>(m_raw_buffer->data());
52✔
270

271
    if (m_gzip_stream->block_state == isal_block_state::ISAL_BLOCK_FINISH) {
26✔
272
      if (is_raw_buffer_gzip()) {
13✔
273
        isal_inflate_reset(m_gzip_stream.get());
16✔
274

275
        const auto result =
8✔
276
          isal_read_gzip_header(m_gzip_stream.get(), m_gzip_header.get());
24✔
277
        check_isal_return_code(
8✔
278
          result, m_reader.filename(), "reading next gzip header from");
8✔
279
      } else if (m_gzip_stream->avail_in) {
10✔
280
        log::warn() << "Ignoring trailing garbage at the end of "
3✔
281
                    << shell_escape(m_reader.filename());
4✔
282

283
        m_buffer_ptr = m_buffer->data();
3✔
284
        m_buffer_end = m_buffer_ptr;
1✔
285
        m_eof = true;
1✔
286
        return;
1✔
287
      }
288
    }
289
  }
290

291
  check_isal_return_code(
17✔
292
    isal_inflate(m_gzip_stream.get()), m_reader.filename(), "decompressing");
34✔
293

294
  m_buffer_ptr = m_buffer->data();
51✔
295
  m_buffer_end = m_buffer_ptr + (m_buffer->size() - m_gzip_stream->avail_out);
51✔
296

297
  if (m_eof && !m_gzip_stream->avail_in &&
25✔
298
      m_gzip_stream->block_state != isal_block_state::ISAL_BLOCK_FINISH) {
8✔
299
    throw_gzip_error(m_reader.filename(),
×
300
                     "decompressing",
301
                     "unexpected end of file",
302
                     "file is likely truncated!");
303
  }
304
}
305

306
} // namespace adapterremoval
28✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc