• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #46

27 Nov 2024 03:10PM UTC coverage: 27.245% (+1.0%) from 26.244%
#46

push

travis-ci

MikkelSchubert
fix convenience executable make target

2609 of 9576 relevant lines covered (27.25%)

4268.73 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.86
/src/strutils.cpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2015 by Mikkel Schubert - mikkelsch@gmail.com           *
5
 *                                                                       *
6
 * This program is free software: you can redistribute it and/or modify  *
7
 * it under the terms of the GNU General Public License as published by  *
8
 * the Free Software Foundation, either version 3 of the License, or     *
9
 * (at your option) any later version.                                   *
10
 *                                                                       *
11
 * This program is distributed in the hope that it will be useful,       *
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
14
 * GNU General Public License for more details.                          *
15
 *                                                                       *
16
 * You should have received a copy of the GNU General Public License     *
17
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
18
\*************************************************************************/
19
#include "strutils.hpp" // declarations
20
#include "debug.hpp"    // for AR_REQUIRE
21
#include <algorithm>    // for min, reverse, max
22
#include <cctype>       // for isprint, isalnum, tolower, toupper
23
#include <charconv>     // for from_chars
24
#include <chrono>       // for system_clock
25
#include <cmath>        // for log10, pow, round
26
#include <cstdint>      // for uint64_t, int64_t
27
#include <iomanip>      // for operator<<, setprecision
28
#include <sstream>      // for ostringstream, operator<<, basic_ostream, bas...
29
#include <stdexcept>    // for invalid_argument
30
#include <unistd.h>     // for STDOUT_FILENO
31
#include <vector>       // for vector, swap
32

33
namespace adapterremoval {
34

35
namespace {
36

37
string_vec
38
indent(string_vec lines, size_t n_indent, bool indent_first)
91✔
39
{
40
  const std::string indentation(n_indent, ' ');
182✔
41
  for (auto it = lines.begin(); it != lines.end(); ++it) {
832✔
42
    if (!it->empty() && (indent_first || it != lines.begin())) {
348✔
43
      it->insert(0, indentation);
50✔
44
    }
45
  }
46

47
  return lines;
273✔
48
}
91✔
49

50
} // namespace
51

52
size_t
53
levenshtein(const std::string_view s, const std::string_view t)
17✔
54
{
55
  std::vector<size_t> v0(t.size() + 1, 0);
17✔
56
  std::vector<size_t> v1(t.size() + 1, 0);
17✔
57

58
  for (size_t i = 0; i < v0.size(); ++i) {
156✔
59
    v0.at(i) = i;
61✔
60
  }
61

62
  for (size_t i = 0; i < s.size(); ++i) {
58✔
63
    v1.at(0) = i + 1;
41✔
64

65
    for (size_t j = 0; j < t.size(); ++j) {
214✔
66
      const auto del = v0.at(j + 1) + 1;
173✔
67
      const auto ins = v1.at(j) + 1;
173✔
68
      const auto sub = s.at(i) == t.at(j) ? v0.at(j) : v0.at(j) + 1;
173✔
69

70
      v1.at(j + 1) = std::min(del, std::min(ins, sub));
519✔
71
    }
72

73
    std::swap(v0, v1);
82✔
74
  }
75

76
  return v0.back();
34✔
77
}
34✔
78

79
std::string
80
timestamp(const char* format, const bool milliseconds)
3✔
81
{
82
  AR_REQUIRE(format);
3✔
83
  using namespace std::chrono;
3✔
84

85
  const auto now = system_clock::now();
3✔
86
  const auto in_time_t = system_clock::to_time_t(now);
3✔
87

88
  tm in_localtime{};
3✔
89
  std::ostringstream ss;
3✔
90
  ss << std::put_time(localtime_r(&in_time_t, &in_localtime), format);
3✔
91

92
  if (milliseconds) {
3✔
93
    const auto ms =
3✔
94
      duration_cast<std::chrono::milliseconds>(now.time_since_epoch());
6✔
95
    ss << '.' << std::setfill('0') << std::setw(3) << (ms.count() % 1000);
12✔
96
  }
97

98
  return ss.str();
6✔
99
}
3✔
100

101
namespace {
102

103
template<typename T>
104
inline T
105
str_to(std::string_view s)
56✔
106
{
107
  T value{};
56✔
108

109
  s = trim_ascii_whitespace(s);
56✔
110
  const auto* begin = s.data();
56✔
111
  const auto* end = begin + s.size();
56✔
112
  const auto result = std::from_chars(begin, end, value);
56✔
113

114
  if (result.ec != std::errc{}) {
56✔
115
    throw std::invalid_argument("value is not a valid number");
19✔
116
  } else if (result.ptr != end) {
37✔
117
    throw std::invalid_argument("number contains text");
5✔
118
  }
119

120
  return value;
32✔
121
}
122

10✔
123
} // namespace
124

10✔
125
uint32_t
126
str_to_u32(std::string_view s)
10✔
127
{
10✔
128
  return str_to<uint32_t>(s);
10✔
129
}
10✔
130

131
double
10✔
132
str_to_double(std::string_view s)
5✔
133
{
5✔
134
  return str_to<double>(s);
1✔
135
}
136

137
std::string
4✔
138
to_lower(std::string str)
139
{
46✔
140
  for (auto& current : str) {
141
    current = to_lower(current);
46✔
142
  }
143

46✔
144
  return str;
46✔
145
}
46✔
146

46✔
147
std::string
148
to_upper(std::string str)
46✔
149
{
14✔
150
  for (auto& current : str) {
32✔
151
    current = to_upper(current);
4✔
152
  }
153

154
  return str;
28✔
155
}
156

157
bool
158
starts_with(const std::string_view str1, const std::string_view str2)
159
{
160
  if (str1.size() < str2.size()) {
46✔
161
    return false;
162
  }
46✔
163

164
  return str1.substr(0, str2.size()) == str2;
165
}
166

10✔
167
bool
168
ends_with(const std::string_view str1, const std::string_view str2)
10✔
169
{
170
  if (str1.size() < str2.size()) {
171
    return false;
172
  }
14✔
173

174
  return str1.substr(str1.size() - str2.size()) == str2;
200✔
175
}
56✔
176

177
string_vec
178
split_text(const std::string_view text, const char separator)
14✔
179
{
180
  string_vec lines;
181

182
  size_t start = 0;
2✔
183
  size_t end = std::string::npos;
184
  do {
44✔
185
    end = text.find(separator, start);
16✔
186

187
    lines.push_back(std::string{ text.substr(start, end - start) });
188

2✔
189
    start = end + 1;
190
  } while (end != std::string::npos);
191

192
  return lines;
188✔
193
}
194

188✔
195
std::string
196
indent_lines(const std::string_view lines, size_t indentation)
197
{
198
  return join_text(indent(split_lines(lines), indentation, true), "\n");
184✔
199
}
200

201
string_vec
202
wrap_text(const std::string& value, size_t max_width, size_t ljust)
13✔
203
{
204
  size_t current_width = 0;
13✔
205
  size_t current_ljust = 0;
206
  std::istringstream lines_in(value);
207
  string_vec lines_out;
208

11✔
209
  std::string substr;
210
  while (lines_in >> substr) {
211
    if (current_width) {
212
      if (current_ljust + current_width + 1 + substr.length() > max_width) {
203✔
213
        current_ljust = ljust;
214
        lines_out.emplace_back(current_ljust, ' ');
203✔
215
        lines_out.back().append(substr);
216
        current_width = substr.length();
203✔
217
      } else {
203✔
218
        lines_out.back().push_back(' ');
240✔
219
        lines_out.back().append(substr);
240✔
220
        current_width += substr.length() + 1;
221
      }
720✔
222
    } else {
223
      lines_out.push_back(substr);
240✔
224
      current_width += substr.length();
240✔
225
    }
226
  }
203✔
227

×
228
  return lines_out;
229
}
230

11✔
231
std::string_view
232
trim_ascii_whitespace(std::string_view s)
44✔
233
{
234
  constexpr std::string_view whitespace = " \f\n\r\t\v";
235
  auto pos = s.find_first_not_of(whitespace);
236
  if (pos == std::string_view::npos) {
115✔
237
    return std::string_view{};
238
  }
115✔
239

115✔
240
  s.remove_prefix(pos);
115✔
241
  pos = s.find_last_not_of(whitespace);
115✔
242
  // Assumes `pos != npos` since `s` is guaranteed to contain non-whitespace
243
  s.remove_suffix(s.size() - pos - 1);
115✔
244

1,120✔
245
  return s;
445✔
246
}
676✔
247

28✔
248
///////////////////////////////////////////////////////////////////////////////
28✔
249
// Implementations for 'cli_formatter'
28✔
250

56✔
251
cli_formatter::cli_formatter()
252
  : m_indent_first(true)
310✔
253
  , m_ljust(0)
310✔
254
  , m_columns(DEFAULT_MAX_COLUMNS)
620✔
255
  , m_indentation(4)
256
{
257
}
107✔
258

214✔
259
cli_formatter&
260
cli_formatter::set_column_width(size_t value)
261
{
262
  m_columns = value;
230✔
263

115✔
264
  return *this;
265
}
266

64✔
267
cli_formatter&
268
cli_formatter::set_ljust(size_t value)
64✔
269
{
64✔
270
  m_ljust = value;
64✔
271

8✔
272
  return *this;
273
}
274

60✔
275
cli_formatter&
60✔
276
cli_formatter::set_indent(size_t value)
277
{
60✔
278
  m_indentation = value;
279

60✔
280
  return *this;
281
}
282

283
cli_formatter&
284
cli_formatter::set_indent_first_line(bool value)
285
{
50✔
286
  m_indent_first = value;
50✔
287

50✔
288
  return *this;
50✔
289
}
50✔
290

291
std::string
50✔
292
cli_formatter::format(const std::string_view value) const
293
{
294
  std::ostringstream lines_out;
50✔
295

296
  for (const auto& line : split_lines(value)) {
50✔
297
    const auto block = wrap_text(line, m_columns, m_ljust);
298

50✔
299
    lines_out << join_text(indent(block, m_indentation, m_indent_first), "\n");
300
  }
301

302
  return lines_out.str();
32✔
303
}
304

32✔
305
std::string
306
shell_escape(const std::string_view s)
32✔
307
{
308
  if (s.empty()) {
309
    return "''";
310
  }
50✔
311

312
  for (const auto c : s) {
50✔
313
    // Conservative list of safe values; better safe than sorry
314
    if (!isalnum(c) && c != '_' && c != '.' && c != '/' && c != '-') {
50✔
315
      return log_escape(s);
316
    }
317
  }
318

18✔
319
  return std::string{ s };
320
}
18✔
321

322
std::string
18✔
323
log_escape(const std::string_view s)
324
{
325
  std::string out;
326
  out.push_back('\'');
80✔
327

328
  for (const auto c : s) {
80✔
329
    switch (c) {
330
      case '\'':
720✔
331
        out.append("\\'");
80✔
332
        break;
333
      case '\\':
400✔
334
        out.append("\\\\");
160✔
335
        break;
336
      case '\b':
160✔
337
        out.append("\\b");
80✔
338
        break;
339
      case '\f':
340
        out.append("\\f");
46✔
341
        break;
342
      case '\n':
46✔
343
        out.append("\\n");
2✔
344
        break;
345
      case '\r':
346
        out.append("\\r");
201✔
347
        break;
348
      case '\t':
96✔
349
        out.append("\\t");
30✔
350
        break;
351
      default:
352
        if (!std::isprint(c)) {
353
          std::ostringstream ss;
30✔
354
          ss << "\\x" << std::hex << static_cast<int>(c);
355

356
          out.append(ss.str());
357

58✔
358
        } else {
359
          out.push_back(c);
58✔
360
        }
58✔
361
    }
362
  }
517✔
363

343✔
364
  out.push_back('\'');
4✔
365

4✔
366
  return out;
367
}
2✔
368

2✔
369
std::string
370
shell_escape_command(const string_vec& values)
2✔
371
{
2✔
372
  std::ostringstream ss;
373
  for (size_t i = 0; i < values.size(); ++i) {
2✔
374
    if (i) {
2✔
375
      ss << ' ';
376
    }
2✔
377

2✔
378
    ss << shell_escape(values.at(i));
379
  }
2✔
380

2✔
381
  return ss.str();
382
}
2✔
383

2✔
384
std::string
385
html_escape(std::string_view s)
327✔
386
{
327✔
387
  std::string out;
7✔
388

14✔
389
  for (const auto c : s) {
390
    switch (c) {
28✔
391
      case '\'':
392
        out.append("&#39;");
7✔
393
        break;
320✔
394
      case '"':
395
        out.append("&quot;");
396
        break;
397
      case '&':
398
        out.append("&amp;");
58✔
399
        break;
400
      case '<':
58✔
401
        out.append("&lt;");
×
402
        break;
403
      case '>':
404
        out.append("&gt;");
2✔
405
        break;
406
      default:
2✔
407
        out.push_back(c);
14✔
408
    }
5✔
409
  }
4✔
410

411
  return out;
412
}
20✔
413

414
std::string
415
format_thousand_sep(size_t count)
4✔
416
{
2✔
417
  if (!count) {
418
    return "0";
419
  }
4✔
420

421
  std::string ss;
4✔
422
  for (size_t i = 0; count; ++i) {
423
    ss.push_back('0' + (count % 10));
34✔
424
    count /= 10;
22✔
425
    if (count && i && i % 3 == 2) {
2✔
426
      ss.push_back(',');
2✔
427
    }
428
  }
2✔
429

2✔
430
  std::reverse(ss.begin(), ss.end());
431

1✔
432
  return ss;
1✔
433
}
434

1✔
435
std::string
1✔
436
format_rough_number(size_t value, size_t out_digits)
437
{
1✔
438
  AR_REQUIRE(out_digits > 0);
1✔
439
  if (value == 0) {
440
    return "0";
15✔
441
  }
15✔
442

443
  auto rounded = static_cast<double>(value);
444
  auto in_digits = static_cast<size_t>(std::log10(rounded));
445
  if (out_digits > in_digits) {
4✔
446
    return std::to_string(value);
×
447
  }
448

449
  // Round to desired number of significant digits
5✔
450
  const auto tmp = std::pow(10, in_digits - out_digits + 1);
451
  rounded = std::round(rounded / tmp) * tmp;
5✔
452

2✔
453
  // Rounding up may result in the number of digits increasing
454
  in_digits = static_cast<size_t>(std::log10(rounded));
455

4✔
456
  const std::string units = "KMGTP";
24✔
457
  const size_t unit = std::min<size_t>(units.size(), in_digits / 3);
20✔
458
  const double scaled = rounded / std::pow(10.0, unit * 3);
20✔
459
  const size_t precision =
20✔
460
    out_digits - std::min<size_t>(out_digits, in_digits - unit * 3 + 1);
4✔
461

462
  std::ostringstream ss;
463
  ss << std::fixed << std::setprecision(precision) << scaled;
464

12✔
465
  if (unit) {
466
    ss << " " << units.at(unit - 1);
4✔
467
  }
5✔
468

469
  return ss.str();
470
}
37✔
471

472
std::string
44✔
473
format_fraction(uint64_t num, uint64_t denom, size_t precision)
36✔
474
{
6✔
475
  if (denom) {
476
    const double fraction = static_cast<double>(num) / denom;
477

33✔
478
    std::ostringstream ss;
33✔
479
    ss << std::fixed << std::setprecision(precision) << fraction;
33✔
480

5✔
481
    return ss.str();
482
  } else {
483
    return "NA";
484
  }
28✔
485
}
28✔
486

487
std::string
488
format_percentage(uint64_t num, uint64_t denom, size_t precision)
28✔
489
{
490
  if (denom) {
56✔
491
    return format_fraction(num * 100, denom, precision) + " %";
84✔
492
  } else {
28✔
493
    return "NA";
28✔
494
  }
56✔
495
}
496

28✔
497
} // namespace adapterremoval
84✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc