• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

MikkelSchubert / adapterremoval / #36

22 Jul 2024 09:33AM UTC coverage: 87.26% (-12.7%) from 100.0%
#36

push

travis-ci

MikkelSchubert
remove duplicate tests

2185 of 2504 relevant lines covered (87.26%)

16293.15 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

99.09
/src/strutils.cpp
1
/*************************************************************************\
2
 * AdapterRemoval - cleaning next-generation sequencing reads            *
3
 *                                                                       *
4
 * Copyright (C) 2015 by Mikkel Schubert - mikkelsch@gmail.com           *
5
 *                                                                       *
6
 * This program is free software: you can redistribute it and/or modify  *
7
 * it under the terms of the GNU General Public License as published by  *
8
 * the Free Software Foundation, either version 3 of the License, or     *
9
 * (at your option) any later version.                                   *
10
 *                                                                       *
11
 * This program is distributed in the hope that it will be useful,       *
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
14
 * GNU General Public License for more details.                          *
15
 *                                                                       *
16
 * You should have received a copy of the GNU General Public License     *
17
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. *
18
\*************************************************************************/
19
#include "strutils.hpp" // declarations
20
#include "debug.hpp"    // for AR_REQUIRE
21
#include <algorithm>    // for min, reverse, max
22
#include <cctype>       // for isprint, isalnum, tolower, toupper
23
#include <chrono>       // for system_clock
24
#include <cmath>        // for log10, pow, round
25
#include <cstdint>      // for uint64_t, int64_t
26
#include <iomanip>      // for operator<<, setprecision
27
#include <limits>       // for numeric_limits
28
#include <sstream>      // for ostringstream, operator<<, basic_ostream, bas...
29
#include <stdexcept>    // for invalid_argument
30
#include <unistd.h>     // for STDOUT_FILENO
31
#include <vector>       // for vector, swap
32

33
namespace adapterremoval {
34

35
namespace {
36

37
string_vec
38
indent(string_vec lines, size_t n_indent, bool indent_first)
85✔
39
{
40
  const std::string indentation(n_indent, ' ');
170✔
41
  for (auto it = lines.begin(); it != lines.end(); ++it) {
673✔
42
    if (!it->empty() && (indent_first || it != lines.begin())) {
324✔
43
      it->insert(0, indentation);
161✔
44
    }
45
  }
46

47
  return lines;
255✔
48
}
85✔
49

50
void
51
join(std::ostringstream& lines_out, string_vec lines)
85✔
52
{
53
  for (auto it = lines.begin(); it != lines.end(); ++it) {
673✔
54
    if (it != lines.begin()) {
333✔
55
      lines_out << "\n";
26✔
56
    }
57

58
    lines_out << *it;
333✔
59
  }
60
}
85✔
61

62
} // namespace
63

64
size_t
65
levenshtein(const std::string& s, const std::string& t)
17✔
66
{
67
  std::vector<size_t> v0(t.size() + 1, 0);
34✔
68
  std::vector<size_t> v1(t.size() + 1, 0);
34✔
69

70
  for (size_t i = 0; i < v0.size(); ++i) {
156✔
71
    v0.at(i) = i;
122✔
72
  }
73

74
  for (size_t i = 0; i < s.size(); ++i) {
116✔
75
    v1.at(0) = i + 1;
82✔
76

77
    for (size_t j = 0; j < t.size(); ++j) {
428✔
78
      const auto del = v0.at(j + 1) + 1;
346✔
79
      const auto ins = v1.at(j) + 1;
346✔
80
      const auto sub = s.at(i) == t.at(j) ? v0.at(j) : v0.at(j) + 1;
692✔
81

82
      v1.at(j + 1) = std::min(del, std::min(ins, sub));
692✔
83
    }
84

85
    std::swap(v0, v1);
82✔
86
  }
87

88
  return v0.back();
51✔
89
}
34✔
90

91
std::string
92
timestamp(const char* format, const bool milliseconds)
3✔
93
{
94
  AR_REQUIRE(format);
3✔
95
  using namespace std::chrono;
3✔
96

97
  const auto now = system_clock::now();
3✔
98
  const auto in_time_t = system_clock::to_time_t(now);
3✔
99

100
  tm in_localtime{};
3✔
101
  std::ostringstream ss;
3✔
102
  ss << std::put_time(localtime_r(&in_time_t, &in_localtime), format);
3✔
103

104
  if (milliseconds) {
3✔
105
    const auto ms =
3✔
106
      duration_cast<std::chrono::milliseconds>(now.time_since_epoch());
6✔
107
    ss << '.' << std::setfill('0') << std::setw(3) << (ms.count() % 1000);
12✔
108
  }
109

110
  return ss.str();
6✔
111
}
3✔
112

113
unsigned
114
str_to_unsigned(const std::string& s)
38✔
115
{
116
  std::istringstream stream(s);
38✔
117
  int64_t temp = 0;
38✔
118

119
  if (!(stream >> temp)) {
114✔
120
    throw std::invalid_argument("value is not a valid number");
8✔
121
  }
122

123
  // Failing on trailing, non-numerical values
124
  std::string trailing;
30✔
125
  if (stream >> trailing) {
60✔
126
    throw std::invalid_argument("value contains trailing text");
4✔
127
  }
128

129
  if (temp < 0 || temp > std::numeric_limits<unsigned>::max()) {
26✔
130
    throw std::invalid_argument("numerical value overflows");
6✔
131
  }
132

133
  return static_cast<unsigned>(temp);
40✔
134
}
38✔
135

136
std::string
137
to_lower(const std::string& str)
14✔
138
{
139
  std::string lowercase = str;
14✔
140
  for (auto& current : lowercase) {
200✔
141
    current = to_lower(current);
56✔
142
  }
143

144
  return lowercase;
14✔
145
}
146

147
std::string
148
to_upper(const std::string& str)
2✔
149
{
150
  std::string uppercase = str;
2✔
151
  for (auto& current : uppercase) {
44✔
152
    current = to_upper(current);
16✔
153
  }
154

155
  return uppercase;
2✔
156
}
157

158
/** Returns true if str1 ends with str2 (case sensitive) */
159
bool
160
ends_with(const std::string& str1, const std::string& str2)
13✔
161
{
162
  if (str1.size() < str2.size()) {
39✔
163
    return false;
164
  }
165

166
  auto it1 = str1.rbegin();
11✔
167
  auto it2 = str2.rbegin();
11✔
168
  for (; it2 != str2.rend(); ++it1, ++it2) {
113✔
169
    if (*it1 != *it2) {
63✔
170
      return false;
171
    }
172
  }
173

174
  return true;
175
}
176

177
string_vec
178
split_lines(const std::string& text)
154✔
179
{
180
  string_vec lines;
154✔
181

182
  size_t start = 0;
154✔
183
  size_t end = std::string::npos;
154✔
184
  do {
176✔
185
    end = text.find('\n', start);
176✔
186

187
    lines.push_back(text.substr(start, end - start));
528✔
188

189
    start = end + 1;
176✔
190
  } while (end != std::string::npos);
176✔
191

192
  return lines;
154✔
193
}
×
194

195
std::string
196
indent_lines(const std::string& lines, size_t indentation)
11✔
197
{
198
  std::ostringstream lines_out;
11✔
199
  join(lines_out, indent(split_lines(lines), indentation, true));
11✔
200

201
  return lines_out.str();
22✔
202
}
11✔
203

204
string_vec
205
wrap_text(const std::string& value, size_t max_width, size_t ljust)
109✔
206
{
207
  size_t current_width = 0;
109✔
208
  size_t current_ljust = 0;
109✔
209
  std::istringstream lines_in(value);
109✔
210
  string_vec lines_out;
109✔
211

212
  std::string substr;
109✔
213
  while (lines_in >> substr) {
1,066✔
214
    if (current_width) {
424✔
215
      if (current_ljust + current_width + 1 + substr.length() > max_width) {
646✔
216
        current_ljust = ljust;
28✔
217
        lines_out.emplace_back(current_ljust, ' ');
28✔
218
        lines_out.back().append(substr);
56✔
219
        current_width = substr.length();
56✔
220
      } else {
221
        lines_out.back().push_back(' ');
590✔
222
        lines_out.back().append(substr);
590✔
223
        current_width += substr.length() + 1;
590✔
224
      }
225
    } else {
226
      lines_out.push_back(substr);
101✔
227
      current_width += substr.length();
202✔
228
    }
229
  }
230

231
  return lines_out;
218✔
232
}
109✔
233

234
///////////////////////////////////////////////////////////////////////////////
235
// Implementations for 'cli_formatter'
236

237
cli_formatter::cli_formatter()
44✔
238
  : m_indent_first(true)
44✔
239
  , m_ljust(0)
44✔
240
  , m_columns(DEFAULT_MAX_COLUMNS)
44✔
241
  , m_indentation(4)
44✔
242
{
243
}
44✔
244

245
cli_formatter&
246
cli_formatter::set_column_width(size_t value)
44✔
247
{
248
  m_columns = value;
44✔
249

250
  return *this;
44✔
251
}
252

253
cli_formatter&
254
cli_formatter::set_ljust(size_t value)
32✔
255
{
256
  m_ljust = value;
32✔
257

258
  return *this;
32✔
259
}
260

261
cli_formatter&
262
cli_formatter::set_indent(size_t value)
44✔
263
{
264
  m_indentation = value;
44✔
265

266
  return *this;
44✔
267
}
268

269
cli_formatter&
270
cli_formatter::set_indent_first_line(bool value)
12✔
271
{
272
  m_indent_first = value;
12✔
273

274
  return *this;
12✔
275
}
276

277
std::string
278
cli_formatter::format(const std::string& value) const
74✔
279
{
280
  std::ostringstream lines_out;
74✔
281

282
  for (const auto& line : split_lines(value)) {
592✔
283
    const auto block = wrap_text(line, m_columns, m_ljust);
74✔
284

285
    join(lines_out, indent(block, m_indentation, m_indent_first));
74✔
286
  }
148✔
287

288
  return lines_out.str();
148✔
289
}
74✔
290

291
std::string
292
shell_escape(const std::string& s)
46✔
293
{
294
  if (s.empty()) {
92✔
295
    return "''";
2✔
296
  }
297

298
  for (const auto c : s) {
474✔
299
    // Conservative list of safe values; better safe than sorry
300
    if (!isalnum(c) && c != '_' && c != '.' && c != '/' && c != '-') {
96✔
301
      return log_escape(s);
30✔
302
    }
303
  }
304

305
  return s;
15✔
306
}
307

308
std::string
309
log_escape(const std::string& s)
43✔
310
{
311
  std::string out;
43✔
312
  out.push_back('\'');
43✔
313

314
  for (const auto c : s) {
1,248✔
315
    switch (c) {
269✔
316
      case '\'':
4✔
317
        out.append("\\'");
4✔
318
        break;
319
      case '\\':
2✔
320
        out.append("\\\\");
2✔
321
        break;
322
      case '\b':
2✔
323
        out.append("\\b");
2✔
324
        break;
325
      case '\f':
2✔
326
        out.append("\\f");
2✔
327
        break;
328
      case '\n':
2✔
329
        out.append("\\n");
2✔
330
        break;
331
      case '\r':
2✔
332
        out.append("\\r");
2✔
333
        break;
334
      case '\t':
2✔
335
        out.append("\\t");
2✔
336
        break;
337
      default:
253✔
338
        if (!std::isprint(c)) {
253✔
339
          std::ostringstream ss;
7✔
340
          ss << "\\x" << std::hex << static_cast<int>(c);
14✔
341

342
          out.append(ss.str());
28✔
343

344
        } else {
7✔
345
          out.push_back(c);
246✔
346
        }
347
    }
348
  }
349

350
  out.push_back('\'');
43✔
351

352
  return out;
43✔
353
}
×
354

355
std::string
356
shell_escape_command(const string_vec& values)
2✔
357
{
358
  std::ostringstream ss;
2✔
359
  for (size_t i = 0; i < values.size(); ++i) {
14✔
360
    if (i) {
5✔
361
      ss << ' ';
4✔
362
    }
363

364
    ss << shell_escape(values.at(i));
20✔
365
  }
366

367
  return ss.str();
4✔
368
}
2✔
369

370
std::string
371
format_thousand_sep(size_t count)
5✔
372
{
373
  if (!count) {
5✔
374
    return "0";
2✔
375
  }
376

377
  std::string ss;
4✔
378
  for (size_t i = 0; count; ++i) {
24✔
379
    ss.push_back('0' + (count % 10));
20✔
380
    count /= 10;
20✔
381
    if (count && i && i % 3 == 2) {
20✔
382
      ss.push_back(',');
4✔
383
    }
384
  }
385

386
  std::reverse(ss.begin(), ss.end());
12✔
387

388
  return ss;
4✔
389
}
5✔
390

391
std::string
392
format_rough_number(size_t value, size_t out_digits)
37✔
393
{
394
  AR_REQUIRE(out_digits > 0);
41✔
395
  if (value == 0) {
36✔
396
    return "0";
6✔
397
  }
398

399
  auto rounded = static_cast<double>(value);
33✔
400
  auto in_digits = static_cast<size_t>(std::log10(rounded));
33✔
401
  if (out_digits > in_digits) {
33✔
402
    return std::to_string(value);
5✔
403
  }
404

405
  // Round to desired number of significant digits
406
  const auto tmp = std::pow(10, in_digits - out_digits + 1);
28✔
407
  rounded = std::round(rounded / tmp) * tmp;
28✔
408

409
  // Rounding up may result in the number of digits increasing
410
  in_digits = static_cast<size_t>(std::log10(rounded));
28✔
411

412
  const std::string units = "KMGTP";
56✔
413
  const size_t unit = std::min<size_t>(units.size(), in_digits / 3);
84✔
414
  const double scaled = rounded / std::pow(10.0, unit * 3);
28✔
415
  const size_t precision =
28✔
416
    out_digits - std::min<size_t>(out_digits, in_digits - unit * 3 + 1);
56✔
417

418
  std::ostringstream ss;
28✔
419
  ss << std::fixed << std::setprecision(precision) << scaled;
84✔
420

421
  if (unit) {
28✔
422
    ss << " " << units.at(unit - 1);
52✔
423
  }
424

425
  return ss.str();
28✔
426
}
64✔
427

428
std::string
429
format_fraction(uint64_t num, uint64_t denom, size_t precision)
13✔
430
{
431
  if (denom) {
13✔
432
    const double fraction = static_cast<double>(num) / denom;
9✔
433

434
    std::ostringstream ss;
9✔
435
    ss << std::fixed << std::setprecision(precision) << fraction;
27✔
436

437
    return ss.str();
9✔
438
  } else {
9✔
439
    return "NA";
8✔
440
  }
441
}
442

443
std::string
444
format_percentage(uint64_t num, uint64_t denom, size_t precision)
6✔
445
{
446
  return format_fraction(num * 100, denom, precision);
6✔
447
}
448

449
} // namespace adapterremoval
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc