• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #47

12 Jan 2025 09:43AM UTC coverage: 27.139% (-0.1%) from 27.245%
#47

push

travis-ci

MikkelSchubert
silence warning on musl build

2595 of 9562 relevant lines covered (27.14%)

4274.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.8
/src/strutils.cpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2015 by Mikkel Schubert - mikkelsch@gmail.com           *
5
 *                                                                       *
6
 * This program is free software: you can redistribute it and/or modify  *
7
 * it under the terms of the GNU General Public License as published by  *
8
 * the Free Software Foundation, either version 3 of the License, or     *
9
 * (at your option) any later version.                                   *
10
 *                                                                       *
11
 * This program is distributed in the hope that it will be useful,       *
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
14
 * GNU General Public License for more details.                          *
15
 *                                                                       *
16
 * You should have received a copy of the GNU General Public License     *
17
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
18
\*************************************************************************/
19
#include "strutils.hpp" // declarations
20
#include "debug.hpp"    // for AR_REQUIRE
21
#include <algorithm>    // for min, reverse, max
22
#include <cctype>       // for isprint, isalnum, tolower, toupper
23
#include <charconv>     // for from_chars
24
#include <chrono>       // for system_clock
25
#include <cmath>        // for log10, pow, round
26
#include <cstdint>      // for uint64_t, int64_t
27
#include <iomanip>      // for operator<<, setprecision
28
#include <sstream>      // for ostringstream, operator<<, basic_ostream, bas...
29
#include <stdexcept>    // for invalid_argument
30
#include <unistd.h>     // for STDOUT_FILENO
31
#include <vector>       // for vector, swap
32

33
namespace adapterremoval {
34

35
namespace {
36

37
string_vec
38
indent(string_vec lines, size_t n_indent, bool indent_first)
91✔
39
{
40
  const std::string indentation(n_indent, ' ');
182✔
41
  for (auto it = lines.begin(); it != lines.end(); ++it) {
832✔
42
    if (!it->empty() && (indent_first || it != lines.begin())) {
348✔
43
      it->insert(0, indentation);
50✔
44
    }
45
  }
46

47
  return lines;
273✔
48
}
91✔
49

50
} // namespace
51

52
size_t
53
levenshtein(const std::string_view s, const std::string_view t)
17✔
54
{
55
  std::vector<size_t> v0(t.size() + 1, 0);
17✔
56
  std::vector<size_t> v1(t.size() + 1, 0);
17✔
57

58
  for (size_t i = 0; i < v0.size(); ++i) {
156✔
59
    v0.at(i) = i;
61✔
60
  }
61

62
  for (size_t i = 0; i < s.size(); ++i) {
58✔
63
    v1.at(0) = i + 1;
41✔
64

65
    for (size_t j = 0; j < t.size(); ++j) {
214✔
66
      const auto del = v0.at(j + 1) + 1;
173✔
67
      const auto ins = v1.at(j) + 1;
173✔
68
      const auto sub = s.at(i) == t.at(j) ? v0.at(j) : v0.at(j) + 1;
173✔
69

70
      v1.at(j + 1) = std::min(del, std::min(ins, sub));
519✔
71
    }
72

73
    std::swap(v0, v1);
82✔
74
  }
75

76
  return v0.back();
34✔
77
}
34✔
78

79
std::string
80
timestamp(const char* format, const bool milliseconds)
3✔
81
{
82
  AR_REQUIRE(format);
3✔
83
  using namespace std::chrono;
3✔
84

85
  const auto now = system_clock::now();
3✔
86
  const auto in_time_t = system_clock::to_time_t(now);
3✔
87

88
  tm in_localtime{};
3✔
89
  std::ostringstream ss;
3✔
90
  ss << std::put_time(localtime_r(&in_time_t, &in_localtime), format);
3✔
91

92
  if (milliseconds) {
3✔
93
    const auto ms =
3✔
94
      duration_cast<std::chrono::milliseconds>(now.time_since_epoch());
6✔
95
    ss << '.' << std::setfill('0') << std::setw(3) << (ms.count() % 1000);
12✔
96
  }
97

98
  return ss.str();
6✔
99
}
3✔
100

101
namespace {
102

103
template<typename T>
104
inline T
105
str_to(std::string_view s)
46✔
106
{
107
  T value{};
46✔
108

109
  s = trim_ascii_whitespace(s);
46✔
110
  const auto* begin = s.data();
46✔
111
  const auto* end = begin + s.size();
46✔
112
  const auto result = std::from_chars(begin, end, value);
46✔
113

114
  if (result.ec != std::errc{}) {
46✔
115
    throw std::invalid_argument("value is not a valid number");
14✔
116
  } else if (result.ptr != end) {
32✔
117
    throw std::invalid_argument("number contains trailing text");
4✔
118
  }
119

120
  return value;
28✔
121
}
122

123
} // namespace
124

125
uint32_t
126
str_to_u32(std::string_view s)
46✔
127
{
128
  return str_to<uint32_t>(s);
46✔
129
}
130

131
double
132
str_to_double(const std::string& s)
10✔
133
{
134
  // FIXME: This should use `str_to`, but that is not supported by older Clang
135
  double value = 0;
10✔
136
  std::istringstream stream(s);
10✔
137
  if (!(stream >> value)) {
30✔
138
    throw std::invalid_argument("value is not a valid number");
5✔
139
  }
140

141
  char trailing = 0;
5✔
142
  if (stream >> trailing) {
10✔
143
    throw std::invalid_argument("number contains trailing text");
1✔
144
  }
145

146
  return value;
4✔
147
}
10✔
148

149
std::string
150
to_lower(std::string str)
14✔
151
{
152
  for (auto& current : str) {
200✔
153
    current = to_lower(current);
56✔
154
  }
155

156
  return str;
14✔
157
}
158

159
std::string
160
to_upper(std::string str)
2✔
161
{
162
  for (auto& current : str) {
44✔
163
    current = to_upper(current);
16✔
164
  }
165

166
  return str;
2✔
167
}
168

169
bool
170
starts_with(const std::string_view str1, const std::string_view str2)
188✔
171
{
172
  if (str1.size() < str2.size()) {
188✔
173
    return false;
174
  }
175

176
  return str1.substr(0, str2.size()) == str2;
184✔
177
}
178

179
bool
180
ends_with(const std::string_view str1, const std::string_view str2)
13✔
181
{
182
  if (str1.size() < str2.size()) {
13✔
183
    return false;
184
  }
185

186
  return str1.substr(str1.size() - str2.size()) == str2;
11✔
187
}
188

189
string_vec
190
split_text(const std::string_view text, const char separator)
203✔
191
{
192
  string_vec lines;
203✔
193

194
  size_t start = 0;
203✔
195
  size_t end = std::string::npos;
203✔
196
  do {
240✔
197
    end = text.find(separator, start);
240✔
198

199
    lines.push_back(std::string{ text.substr(start, end - start) });
720✔
200

201
    start = end + 1;
240✔
202
  } while (end != std::string::npos);
240✔
203

204
  return lines;
203✔
205
}
×
206

207
std::string
208
indent_lines(const std::string_view lines, size_t indentation)
11✔
209
{
210
  return join_text(indent(split_lines(lines), indentation, true), "\n");
44✔
211
}
212

213
string_vec
214
wrap_text(const std::string& value, size_t max_width, size_t ljust)
115✔
215
{
216
  size_t current_width = 0;
115✔
217
  size_t current_ljust = 0;
115✔
218
  std::istringstream lines_in(value);
115✔
219
  string_vec lines_out;
115✔
220

221
  std::string substr;
115✔
222
  while (lines_in >> substr) {
1,120✔
223
    if (current_width) {
445✔
224
      if (current_ljust + current_width + 1 + substr.length() > max_width) {
676✔
225
        current_ljust = ljust;
28✔
226
        lines_out.emplace_back(current_ljust, ' ');
28✔
227
        lines_out.back().append(substr);
28✔
228
        current_width = substr.length();
56✔
229
      } else {
230
        lines_out.back().push_back(' ');
310✔
231
        lines_out.back().append(substr);
310✔
232
        current_width += substr.length() + 1;
620✔
233
      }
234
    } else {
235
      lines_out.push_back(substr);
107✔
236
      current_width += substr.length();
214✔
237
    }
238
  }
239

240
  return lines_out;
230✔
241
}
115✔
242

243
std::string_view
244
trim_ascii_whitespace(std::string_view s)
54✔
245
{
246
  constexpr std::string_view whitespace = " \f\n\r\t\v";
54✔
247
  auto pos = s.find_first_not_of(whitespace);
54✔
248
  if (pos == std::string_view::npos) {
54✔
249
    return std::string_view{};
8✔
250
  }
251

252
  s.remove_prefix(pos);
50✔
253
  pos = s.find_last_not_of(whitespace);
50✔
254
  // Assumes `pos != npos` since `s` is guaranteed to contain non-whitespace
255
  s.remove_suffix(s.size() - pos - 1);
50✔
256

257
  return s;
50✔
258
}
259

260
///////////////////////////////////////////////////////////////////////////////
261
// Implementations for 'cli_formatter'
262

263
cli_formatter::cli_formatter()
50✔
264
  : m_indent_first(true)
50✔
265
  , m_ljust(0)
50✔
266
  , m_columns(DEFAULT_MAX_COLUMNS)
50✔
267
  , m_indentation(4)
50✔
268
{
269
}
50✔
270

271
cli_formatter&
272
cli_formatter::set_column_width(size_t value)
50✔
273
{
274
  m_columns = value;
50✔
275

276
  return *this;
50✔
277
}
278

279
cli_formatter&
280
cli_formatter::set_ljust(size_t value)
32✔
281
{
282
  m_ljust = value;
32✔
283

284
  return *this;
32✔
285
}
286

287
cli_formatter&
288
cli_formatter::set_indent(size_t value)
50✔
289
{
290
  m_indentation = value;
50✔
291

292
  return *this;
50✔
293
}
294

295
cli_formatter&
296
cli_formatter::set_indent_first_line(bool value)
18✔
297
{
298
  m_indent_first = value;
18✔
299

300
  return *this;
18✔
301
}
302

303
std::string
304
cli_formatter::format(const std::string_view value) const
80✔
305
{
306
  std::ostringstream lines_out;
80✔
307

308
  for (const auto& line : split_lines(value)) {
720✔
309
    const auto block = wrap_text(line, m_columns, m_ljust);
80✔
310

311
    lines_out << join_text(indent(block, m_indentation, m_indent_first), "\n");
400✔
312
  }
160✔
313

314
  return lines_out.str();
160✔
315
}
80✔
316

317
std::string
318
shell_escape(const std::string_view s)
46✔
319
{
320
  if (s.empty()) {
46✔
321
    return "''";
2✔
322
  }
323

324
  for (const auto c : s) {
201✔
325
    // Conservative list of safe values; better safe than sorry
326
    if (!isalnum(c) && c != '_' && c != '.' && c != '/' && c != '-') {
96✔
327
      return log_escape(s);
30✔
328
    }
329
  }
330

331
  return std::string{ s };
30✔
332
}
333

334
std::string
335
log_escape(const std::string_view s)
58✔
336
{
337
  std::string out;
58✔
338
  out.push_back('\'');
58✔
339

340
  for (const auto c : s) {
517✔
341
    switch (c) {
343✔
342
      case '\'':
4✔
343
        out.append("\\'");
4✔
344
        break;
345
      case '\\':
2✔
346
        out.append("\\\\");
2✔
347
        break;
348
      case '\b':
2✔
349
        out.append("\\b");
2✔
350
        break;
351
      case '\f':
2✔
352
        out.append("\\f");
2✔
353
        break;
354
      case '\n':
2✔
355
        out.append("\\n");
2✔
356
        break;
357
      case '\r':
2✔
358
        out.append("\\r");
2✔
359
        break;
360
      case '\t':
2✔
361
        out.append("\\t");
2✔
362
        break;
363
      default:
327✔
364
        if (!std::isprint(c)) {
327✔
365
          std::ostringstream ss;
7✔
366
          ss << "\\x" << std::hex << static_cast<int>(c);
14✔
367

368
          out.append(ss.str());
28✔
369

370
        } else {
7✔
371
          out.push_back(c);
320✔
372
        }
373
    }
374
  }
375

376
  out.push_back('\'');
58✔
377

378
  return out;
58✔
379
}
×
380

381
std::string
382
shell_escape_command(const string_vec& values)
2✔
383
{
384
  std::ostringstream ss;
2✔
385
  for (size_t i = 0; i < values.size(); ++i) {
14✔
386
    if (i) {
5✔
387
      ss << ' ';
4✔
388
    }
389

390
    ss << shell_escape(values.at(i));
20✔
391
  }
392

393
  return ss.str();
4✔
394
}
2✔
395

396
std::string
397
html_escape(std::string_view s)
4✔
398
{
399
  std::string out;
4✔
400

401
  for (const auto c : s) {
34✔
402
    switch (c) {
22✔
403
      case '\'':
2✔
404
        out.append("&#39;");
2✔
405
        break;
406
      case '"':
2✔
407
        out.append("&quot;");
2✔
408
        break;
409
      case '&':
1✔
410
        out.append("&amp;");
1✔
411
        break;
412
      case '<':
1✔
413
        out.append("&lt;");
1✔
414
        break;
415
      case '>':
1✔
416
        out.append("&gt;");
1✔
417
        break;
418
      default:
15✔
419
        out.push_back(c);
15✔
420
    }
421
  }
422

423
  return out;
4✔
424
}
×
425

426
std::string
427
format_thousand_sep(size_t count)
5✔
428
{
429
  if (!count) {
5✔
430
    return "0";
2✔
431
  }
432

433
  std::string ss;
4✔
434
  for (size_t i = 0; count; ++i) {
24✔
435
    ss.push_back('0' + (count % 10));
20✔
436
    count /= 10;
20✔
437
    if (count && i && i % 3 == 2) {
20✔
438
      ss.push_back(',');
4✔
439
    }
440
  }
441

442
  std::reverse(ss.begin(), ss.end());
12✔
443

444
  return ss;
4✔
445
}
5✔
446

447
std::string
448
format_rough_number(size_t value, size_t out_digits)
37✔
449
{
450
  AR_REQUIRE(out_digits > 0);
44✔
451
  if (value == 0) {
36✔
452
    return "0";
6✔
453
  }
454

455
  auto rounded = static_cast<double>(value);
33✔
456
  auto in_digits = static_cast<size_t>(std::log10(rounded));
33✔
457
  if (out_digits > in_digits) {
33✔
458
    return std::to_string(value);
5✔
459
  }
460

461
  // Round to desired number of significant digits
462
  const auto tmp = std::pow(10, in_digits - out_digits + 1);
28✔
463
  rounded = std::round(rounded / tmp) * tmp;
28✔
464

465
  // Rounding up may result in the number of digits increasing
466
  in_digits = static_cast<size_t>(std::log10(rounded));
28✔
467

468
  const std::string units = "KMGTP";
56✔
469
  const size_t unit = std::min<size_t>(units.size(), in_digits / 3);
84✔
470
  const double scaled = rounded / std::pow(10.0, unit * 3);
28✔
471
  const size_t precision =
28✔
472
    out_digits - std::min<size_t>(out_digits, in_digits - unit * 3 + 1);
56✔
473

474
  std::ostringstream ss;
28✔
475
  ss << std::fixed << std::setprecision(precision) << scaled;
84✔
476

477
  if (unit) {
28✔
478
    ss << " " << units.at(unit - 1);
26✔
479
  }
480

481
  return ss.str();
28✔
482
}
64✔
483

484
std::string
485
format_fraction(uint64_t num, uint64_t denom, size_t precision)
11✔
486
{
487
  if (denom) {
11✔
488
    const double fraction = static_cast<double>(num) / denom;
9✔
489

490
    std::ostringstream ss;
9✔
491
    ss << std::fixed << std::setprecision(precision) << fraction;
27✔
492

493
    return ss.str();
9✔
494
  } else {
9✔
495
    return "NA";
4✔
496
  }
497
}
498

499
std::string
500
format_percentage(uint64_t num, uint64_t denom, size_t precision)
6✔
501
{
502
  if (denom) {
6✔
503
    return format_fraction(num * 100, denom, precision) + " %";
12✔
504
  } else {
505
    return "NA";
4✔
506
  }
507
}
508

509
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc