• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tstack / lnav / 20245728190-2749

15 Dec 2025 07:59PM UTC coverage: 68.864% (-0.07%) from 68.929%
20245728190-2749

push

github

tstack
[text_format] add plaintext type

Related to #1296

85 of 132 new or added lines in 24 files covered. (64.39%)

73 existing lines in 10 files now uncovered.

51605 of 74938 relevant lines covered (68.86%)

434003.35 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.48
/src/base/string_util.cc
1
/**
2
 * Copyright (c) 2019, Timothy Stack
3
 *
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * * Redistributions of source code must retain the above copyright notice, this
10
 * list of conditions and the following disclaimer.
11
 * * Redistributions in binary form must reproduce the above copyright notice,
12
 * this list of conditions and the following disclaimer in the documentation
13
 * and/or other materials provided with the distribution.
14
 * * Neither the name of Timothy Stack nor the names of its contributors
15
 * may be used to endorse or promote products derived from this software
16
 * without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29

30
#include <algorithm>
31
#include <iterator>
32
#include <regex>
33
#include <sstream>
34
#include <string_view>
35

36
#include "string_util.hh"
37

38
#include "config.h"
39
#include "is_utf8.hh"
40
#include "lnav_log.hh"
41
#include "scn/scan.h"
42
#include "unistr.h"
43

44
using namespace std::string_view_literals;
45

46
void
47
scrub_to_utf8(char* buffer, size_t length)
10✔
48
{
49
    size_t index = 0;
10✔
50
    while (index < length) {
170✔
51
        if (buffer[index] > 0) {
160✔
52
            index += 1;
143✔
53
            continue;
143✔
54
        }
55

56
        auto rc = u8_mblen((uint8_t*) &buffer[index], length - index);
17✔
57
        if (rc <= 0) {
17✔
58
            buffer[index] = '?';
17✔
59
        } else {
NEW
60
            index += rc;
×
61
        }
62
    }
63
}
10✔
64

65
void
66
quote_content(auto_buffer& buf, const string_fragment& sf, char quote_char)
×
67
{
NEW
68
    for (const char ch : sf) {
×
69
        if (ch == quote_char) {
×
70
            buf.push_back('\\').push_back(ch);
×
71
            continue;
×
72
        }
73
        switch (ch) {
×
74
            case '\\':
×
75
                buf.push_back('\\').push_back('\\');
×
76
                break;
×
77
            case '\n':
×
78
                buf.push_back('\\').push_back('n');
×
79
                break;
×
80
            case '\t':
×
81
                buf.push_back('\\').push_back('t');
×
82
                break;
×
83
            case '\r':
×
84
                buf.push_back('\\').push_back('r');
×
85
                break;
×
86
            case '\a':
×
87
                buf.push_back('\\').push_back('a');
×
88
                break;
×
89
            case '\b':
×
90
                buf.push_back('\\').push_back('b');
×
91
                break;
×
92
            default:
×
93
                buf.push_back(ch);
×
94
                break;
×
95
        }
96
    }
97
}
98

99
size_t
100
unquote_content(char* dst, const char* str, size_t len, char quote_char)
94✔
101
{
102
    size_t index = 0;
94✔
103

104
    for (size_t lpc = 0; lpc < len; lpc++, index++) {
6,981✔
105
        dst[index] = str[lpc];
6,887✔
106
        if (str[lpc] == quote_char) {
6,887✔
107
            lpc += 1;
62✔
108
        } else if (str[lpc] == '\\' && (lpc + 1) < len) {
6,825✔
109
            switch (str[lpc + 1]) {
111✔
110
                case 'n':
6✔
111
                    dst[index] = '\n';
6✔
112
                    break;
6✔
113
                case 'r':
×
114
                    dst[index] = '\r';
×
115
                    break;
×
116
                case 't':
×
117
                    dst[index] = '\t';
×
118
                    break;
×
119
                default:
105✔
120
                    dst[index] = str[lpc + 1];
105✔
121
                    break;
105✔
122
            }
123
            lpc += 1;
111✔
124
        }
125
    }
126
    dst[index] = '\0';
94✔
127

128
    return index;
94✔
129
}
130

131
size_t
132
unquote(char* dst, const char* str, size_t len)
91✔
133
{
134
    if (str[0] == 'f' || str[0] == 'r' || str[0] == 'u' || str[0] == 'R'
91✔
135
        || str[0] == 'x' || str[0] == 'X')
90✔
136
    {
137
        str += 1;
1✔
138
        len -= 1;
1✔
139
    }
140
    char quote_char = str[0];
91✔
141

142
    require(str[0] == '\'' || str[0] == '"');
91✔
143

144
    return unquote_content(dst, &str[1], len - 2, quote_char);
91✔
145
}
146

147
size_t
148
unquote_w3c(char* dst, const char* str, size_t len)
7✔
149
{
150
    size_t index = 0;
7✔
151

152
    require(str[0] == '\'' || str[0] == '"');
7✔
153

154
    for (size_t lpc = 1; lpc < (len - 1); lpc++, index++) {
514✔
155
        dst[index] = str[lpc];
507✔
156
        if (str[lpc] == '"') {
507✔
157
            lpc += 1;
×
158
        }
159
    }
160
    dst[index] = '\0';
7✔
161

162
    return index;
7✔
163
}
164

165
void
166
truncate_to(std::string& str, size_t max_char_len)
1,065✔
167
{
168
    static const std::string ELLIPSIS = "\u22ef";
1,283✔
169

170
    if (str.length() < max_char_len) {
1,065✔
171
        return;
1,050✔
172
    }
173

174
    auto str_char_len_res = utf8_string_length(str);
30✔
175

176
    if (str_char_len_res.isErr()) {
30✔
177
        // XXX
178
        return;
×
179
    }
180

181
    auto str_char_len = str_char_len_res.unwrap();
30✔
182
    if (str_char_len <= max_char_len) {
30✔
183
        return;
13✔
184
    }
185

186
    if (max_char_len < 3) {
17✔
187
        str = ELLIPSIS;
2✔
188
        return;
2✔
189
    }
190

191
    auto chars_to_remove = (str_char_len - max_char_len) + 1;
15✔
192
    auto midpoint = str_char_len / 2;
15✔
193
    auto chars_to_keep_at_front = midpoint - (chars_to_remove / 2);
15✔
194
    auto bytes_to_keep_at_front
195
        = utf8_char_to_byte_index(str, chars_to_keep_at_front);
15✔
196
    auto remove_up_to_bytes = utf8_char_to_byte_index(
30✔
197
        str, chars_to_keep_at_front + chars_to_remove);
15✔
198
    auto bytes_to_remove = remove_up_to_bytes - bytes_to_keep_at_front;
15✔
199
    str.erase(bytes_to_keep_at_front, bytes_to_remove);
15✔
200
    str.insert(bytes_to_keep_at_front, ELLIPSIS);
15✔
201
}
30✔
202

203
ssize_t
204
utf8_char_to_byte_index(const std::string& str, ssize_t ch_index)
270✔
205
{
206
    ssize_t retval = 0;
270✔
207

208
    while (ch_index > 0) {
12,403✔
209
        auto ch_len
210
            = ww898::utf::utf8::char_size([&str, retval]() {
12,133✔
211
                  return std::make_pair(str[retval], str.length() - retval - 1);
12,133✔
212
              }).unwrapOr(1);
12,133✔
213

214
        retval += ch_len;
12,133✔
215
        ch_index -= 1;
12,133✔
216
    }
217

218
    return retval;
270✔
219
}
220

221
size_t
222
last_word_str(char* str, size_t len, size_t max_len)
2✔
223
{
224
    if (len < max_len) {
2✔
225
        return len;
×
226
    }
227

228
    size_t last_start = 0;
2✔
229

230
    for (size_t index = 0; index < len; index++) {
27✔
231
        switch (str[index]) {
25✔
232
            case '.':
2✔
233
            case '-':
234
            case '/':
235
            case ':':
236
                last_start = index + 1;
2✔
237
                break;
2✔
238
        }
239
    }
240

241
    if (last_start == 0) {
2✔
242
        return len;
1✔
243
    }
244

245
    memmove(&str[0], &str[last_start], len - last_start);
1✔
246
    return len - last_start;
1✔
247
}
248

249
size_t
250
abbreviate_str(char* str, size_t len, size_t max_len)
173✔
251
{
252
    size_t last_start = 1;
173✔
253

254
    if (len < max_len) {
173✔
255
        return len;
1✔
256
    }
257

258
    for (size_t index = 0; index < len; index++) {
2,240✔
259
        switch (str[index]) {
2,179✔
260
            case '.':
311✔
261
            case '-':
262
            case '/':
263
            case ':':
264
                memmove(&str[last_start], &str[index], len - index);
311✔
265
                len -= (index - last_start);
311✔
266
                index = last_start + 1;
311✔
267
                last_start = index + 1;
311✔
268

269
                if (len < max_len) {
311✔
270
                    return len;
111✔
271
                }
272
                break;
200✔
273
        }
274
    }
275

276
    return len;
61✔
277
}
278

279
void
280
split_ws(const std::string& str, std::vector<std::string>& toks_out)
739✔
281
{
282
    auto str_sf = string_fragment::from_str(str);
739✔
283

284
    while (true) {
285
        auto split_pair = str_sf.split_when(isspace);
2,899✔
286
        if (split_pair.first.empty()) {
2,899✔
287
            if (split_pair.second.empty()) {
938✔
288
                break;
739✔
289
            }
290
            str_sf = split_pair.second;
199✔
291
            continue;
199✔
292
        }
293

294
        toks_out.emplace_back(split_pair.first.to_string());
1,961✔
295
        str_sf = split_pair.second;
1,961✔
296
    }
2,160✔
297
}
739✔
298

299
std::string
300
repeat(const std::string& input, size_t num)
1,989✔
301
{
302
    std::ostringstream os;
1,989✔
303
    std::fill_n(std::ostream_iterator<std::string>(os), num, input);
1,989✔
304
    return os.str();
3,978✔
305
}
1,989✔
306

307
std::string
308
center_str(const std::string& subject, size_t width)
36✔
309
{
310
    std::string retval = subject;
36✔
311

312
    truncate_to(retval, width);
36✔
313

314
    auto retval_length = utf8_string_length(retval).unwrapOr(retval.length());
36✔
315
    auto total_fill = width - retval_length;
36✔
316
    auto before = total_fill / 2;
36✔
317
    auto after = total_fill - before;
36✔
318

319
    retval.insert(0, before, ' ');
36✔
320
    retval.append(after, ' ');
36✔
321

322
    return retval;
36✔
323
}
×
324

325
bool
326
is_blank(const std::string& str)
3,773✔
327
{
328
    return std::all_of(
3,773✔
329
        str.begin(), str.end(), [](const auto ch) { return isspace(ch); });
7,700✔
330
}
331

332
std::string
333
scrub_ws(const char* in, ssize_t len)
70✔
334
{
335
    static constexpr auto TAB_SYMBOL = "\u21e5"sv;
336
    static constexpr auto LF_SYMBOL = "\u240a"sv;
337
    static constexpr auto CR_SYMBOL = "\u240d"sv;
338

339
    std::string retval;
70✔
340

341
    if (len > 0) {
70✔
342
        retval.reserve(len);
8✔
343
    }
344

345
    for (ssize_t lpc = 0; (len == -1 && in[lpc]) || (len >= 0 && lpc < len);
993✔
346
         lpc++)
347
    {
348
        auto ch = in[lpc];
923✔
349

350
        switch (ch) {
923✔
351
            case '\t':
×
352
                retval.append(TAB_SYMBOL);
×
353
                break;
×
354
            case '\n':
14✔
355
                retval.append(LF_SYMBOL);
14✔
356
                break;
14✔
357
            case '\r':
×
358
                retval.append(CR_SYMBOL);
×
359
                break;
×
360
            default:
909✔
361
                retval.push_back(ch);
909✔
362
                break;
909✔
363
        }
364
    }
365

366
    return retval;
70✔
367
}
×
368

369
static constexpr const char* const SUPERSCRIPT_NUMS[] = {
370
    "⁰",
371
    "¹",
372
    "²",
373
    "³",
374
    "⁴",
375
    "⁵",
376
    "⁶",
377
    "⁷",
378
    "⁸",
379
    "⁹",
380
};
381

382
std::string
383
to_superscript(const std::string& in)
323✔
384
{
385
    std::string retval;
323✔
386
    for (const auto ch : in) {
654✔
387
        if (isdigit(ch)) {
331✔
388
            auto index = ch - '0';
331✔
389

390
            retval.append(SUPERSCRIPT_NUMS[index]);
331✔
391
        } else {
392
            retval.push_back(ch);
×
393
        }
394
    }
395

396
    return retval;
323✔
397
}
×
398

399
namespace fmt {
400
auto
401
formatter<lnav::tainted_string>::format(const lnav::tainted_string& ts,
×
402
                                        format_context& ctx)
403
    -> decltype(ctx.out()) const
404
{
405
    auto esc_res = fmt::v10::detail::find_escape(&(*ts.ts_str.begin()),
×
406
                                                 &(*ts.ts_str.end()));
×
407
    if (esc_res.end == nullptr) {
×
408
        return formatter<string_view>::format(ts.ts_str, ctx);
×
409
    }
410

411
    return format_to(ctx.out(), FMT_STRING("{:?}"), ts.ts_str);
×
412
}
413
}  // namespace fmt
414

415
namespace lnav::pcre2pp {
416

417
static bool
418
is_meta(char ch)
3,614,103✔
419
{
420
    switch (ch) {
3,614,103✔
421
        case '\\':
7,008✔
422
        case '^':
423
        case '$':
424
        case '.':
425
        case '[':
426
        case ']':
427
        case '(':
428
        case ')':
429
        case '*':
430
        case '+':
431
        case '?':
432
        case '{':
433
        case '}':
434
            return true;
7,008✔
435
        default:
3,607,095✔
436
            return false;
3,607,095✔
437
    }
438
}
439

440
static std::optional<const char*>
441
char_escape_seq(char ch)
3,607,095✔
442
{
443
    switch (ch) {
3,607,095✔
444
        case '\t':
×
445
            return "\\t";
×
446
        case '\n':
×
447
            return "\\n";
×
448
        default:
3,607,095✔
449
            return std::nullopt;
3,607,095✔
450
    }
451
}
452

453
std::string
454
quote(string_fragment str)
427,942✔
455
{
456
    std::string retval;
427,942✔
457

458
    while (true) {
459
        auto cp_pair_opt = str.consume_codepoint();
4,042,045✔
460
        if (!cp_pair_opt) {
4,042,045✔
461
            break;
427,942✔
462
        }
463

464
        auto cp_pair = cp_pair_opt.value();
3,614,103✔
465
        if ((cp_pair.first & ~0xff) == 0) {
3,614,103✔
466
            if (is_meta(cp_pair.first)) {
3,614,103✔
467
                retval.push_back('\\');
7,008✔
468
            } else {
469
                auto esc_seq = char_escape_seq(cp_pair.first);
3,607,095✔
470
                if (esc_seq) {
3,607,095✔
471
                    retval.append(esc_seq.value());
×
472
                    str = cp_pair_opt->second;
×
473
                    continue;
×
474
                }
475
            }
476
        }
477
        ww898::utf::utf8::write(cp_pair.first,
3,614,103✔
478
                                [&retval](char ch) { retval.push_back(ch); });
7,228,206✔
479
        str = cp_pair_opt->second;
3,614,103✔
480
    }
3,614,103✔
481

482
    return retval;
427,942✔
483
}
×
484

485
}  // namespace lnav::pcre2pp
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc