• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tstack / lnav / 25088628874-3010

29 Apr 2026 02:55AM UTC coverage: 69.252% (+0.02%) from 69.232%
25088628874-3010

push

github

tstack
[textinput] more hotkeys

231 of 399 new or added lines in 4 files covered. (57.89%)

7 existing lines in 3 files now uncovered.

54316 of 78432 relevant lines covered (69.25%)

565770.39 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

75.78
/src/base/intern_string.cc
1
/**
2
 * Copyright (c) 2014, Timothy Stack
3
 *
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * * Redistributions of source code must retain the above copyright notice, this
10
 * list of conditions and the following disclaimer.
11
 * * Redistributions in binary form must reproduce the above copyright notice,
12
 * this list of conditions and the following disclaimer in the documentation
13
 * and/or other materials provided with the distribution.
14
 * * Neither the name of Timothy Stack nor the names of its contributors
15
 * may be used to endorse or promote products derived from this software
16
 * without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 *
29
 * @file intern_string.cc
30
 */
31

32
#include <mutex>
33

34
#include "intern_string.hh"
35

36
#include <string.h>
37

38
#include "config.h"
39
#include "fmt/ostream.h"
40
#include "lnav_log.hh"
41
#include "pcrepp/pcre2pp.hh"
42
#include "unictype.h"
43
#include "uniwidth.h"
44
#include "ww898/cp_utf8.hpp"
45
#include "xxHash/xxhash.h"
46

47
const static int TABLE_SIZE = 4095;
48

49
struct intern_string::intern_table {
50
    ~intern_table()
1,250✔
51
    {
52
        for (auto is : this->it_table) {
5,120,000✔
53
            auto curr = is;
5,118,750✔
54

55
            while (curr != nullptr) {
8,725,645✔
56
                auto next = curr->is_next;
3,606,895✔
57

58
                delete curr;
3,606,895✔
59
                curr = next;
3,606,895✔
60
            }
61
        }
62
    }
1,250✔
63

64
    intern_string* it_table[TABLE_SIZE];
65
};
66

67
intern_table_lifetime
68
intern_string::get_table_lifetime()
25,102,825✔
69
{
70
    static intern_table_lifetime retval = std::make_shared<intern_table>();
25,102,825✔
71

72
    return retval;
25,102,825✔
73
}
74

75
unsigned long
76
hash_str(const char* str, size_t len)
27,592,577✔
77
{
78
    return XXH3_64bits(str, len);
27,592,577✔
79
}
80

81
const intern_string*
82
intern_string::lookup(const char* str, ssize_t len) noexcept
25,094,908✔
83
{
84
    unsigned long h;
85
    intern_string* curr;
86

87
    if (len == -1) {
25,094,908✔
88
        len = strlen(str);
187,502✔
89
    }
90
    h = hash_str(str, len) % TABLE_SIZE;
25,094,908✔
91

92
    {
93
        static std::mutex table_mutex;
94

95
        std::lock_guard<std::mutex> lk(table_mutex);
25,094,908✔
96
        auto tab = get_table_lifetime();
25,094,908✔
97

98
        curr = tab->it_table[h];
25,094,908✔
99
        while (curr != nullptr) {
29,426,063✔
100
            if (static_cast<ssize_t>(curr->is_str.size()) == len
25,819,168✔
101
                && strncmp(curr->is_str.c_str(), str, len) == 0)
25,819,168✔
102
            {
103
                return curr;
21,488,013✔
104
            }
105
            curr = curr->is_next;
4,331,155✔
106
        }
107

108
        curr = new intern_string(str, len);
3,606,895✔
109
        curr->is_next = tab->it_table[h];
3,606,895✔
110
        tab->it_table[h] = curr;
3,606,895✔
111

112
        return curr;
3,606,895✔
113
    }
25,094,908✔
114
}
115

116
const intern_string*
117
intern_string::lookup(const string_fragment& sf) noexcept
9,721,763✔
118
{
119
    return lookup(sf.data(), sf.length());
9,721,763✔
120
}
121

122
const intern_string*
123
intern_string::lookup(const std::string& str) noexcept
8,990,398✔
124
{
125
    return lookup(str.c_str(), str.size());
8,990,398✔
126
}
127

128
bool
129
intern_string::startswith(const char* prefix) const
×
130
{
131
    const char* curr = this->is_str.data();
×
132

133
    while (*prefix != '\0' && *prefix == *curr) {
×
134
        prefix += 1;
×
135
        curr += 1;
×
136
    }
137

138
    return *prefix == '\0';
×
139
}
140

141
string_fragment
142
string_fragment::trim(const char* tokens) const
60,876✔
143
{
144
    string_fragment retval = *this;
60,876✔
145

146
    while (retval.sf_begin < retval.sf_end) {
99,978✔
147
        bool found = false;
99,821✔
148

149
        for (int lpc = 0; tokens[lpc] != '\0'; lpc++) {
324,153✔
150
            if (retval.sf_string[retval.sf_begin] == tokens[lpc]) {
263,434✔
151
                found = true;
39,102✔
152
                break;
39,102✔
153
            }
154
        }
155
        if (!found) {
99,821✔
156
            break;
60,719✔
157
        }
158

159
        retval.sf_begin += 1;
39,102✔
160
    }
161
    while (retval.sf_begin < retval.sf_end) {
61,881✔
162
        bool found = false;
61,724✔
163

164
        for (int lpc = 0; tokens[lpc] != '\0'; lpc++) {
288,206✔
165
            if (retval.sf_string[retval.sf_end - 1] == tokens[lpc]) {
227,487✔
166
                found = true;
1,005✔
167
                break;
1,005✔
168
            }
169
        }
170
        if (!found) {
61,724✔
171
            break;
60,719✔
172
        }
173

174
        retval.sf_end -= 1;
1,005✔
175
    }
176

177
    return retval;
60,876✔
178
}
179

180
string_fragment
181
string_fragment::trim() const
42,293✔
182
{
183
    return this->trim(" \t\r\n");
42,293✔
184
}
185

186
string_fragment
187
string_fragment::rtrim(const char* tokens) const
1,821✔
188
{
189
    string_fragment retval = *this;
1,821✔
190

191
    while (retval.sf_begin < retval.sf_end) {
3,597✔
192
        bool found = false;
3,564✔
193

194
        for (int lpc = 0; tokens[lpc] != '\0'; lpc++) {
5,352✔
195
            if (retval.sf_string[retval.sf_end - 1] == tokens[lpc]) {
3,564✔
196
                found = true;
1,776✔
197
                break;
1,776✔
198
            }
199
        }
200
        if (!found) {
3,564✔
201
            break;
1,788✔
202
        }
203

204
        retval.sf_end -= 1;
1,776✔
205
    }
206

207
    return retval;
1,821✔
208
}
209

210
std::optional<int>
211
string_fragment::rfind(char ch) const
×
212
{
213
    if (this->empty()) {
×
214
        return std::nullopt;
×
215
    }
216

217
    for (auto index = this->sf_end - 1; index >= this->sf_begin; index--) {
×
218
        if (this->sf_string[index] == ch) {
×
219
            return index;
×
220
        }
221
    }
222

223
    return std::nullopt;
×
224
}
225

226
std::optional<string_fragment>
227
string_fragment::consume_n(int amount) const
521✔
228
{
229
    if (amount > this->length()) {
521✔
230
        return std::nullopt;
×
231
    }
232

233
    return string_fragment{
521✔
234
        this->sf_string,
521✔
235
        this->sf_begin + amount,
521✔
236
        this->sf_end,
521✔
237
    };
521✔
238
}
239

240
string_fragment::split_n_result
241
string_fragment::split_n(int amount) const
264,227✔
242
{
243
    amount = std::min(amount, this->length());
264,227✔
244

245
    return {
246
        string_fragment{
528,454✔
247
            this->sf_string,
264,227✔
248
            this->sf_begin,
264,227✔
249
            this->sf_begin + amount,
264,227✔
250
        },
251
        string_fragment{
264,227✔
252
            this->sf_string,
264,227✔
253
            this->sf_begin + amount,
264,227✔
254
            this->sf_end,
264,227✔
255
        },
256
    };
528,454✔
257
}
258

259
std::vector<string_fragment>
260
string_fragment::split_lines() const
423,711✔
261
{
262
    std::vector<string_fragment> retval;
423,711✔
263
    int start = this->sf_begin;
423,711✔
264

265
    for (auto index = start; index < this->sf_end; index++) {
66,539,512✔
266
        if (this->sf_string[index] == '\n') {
66,115,801✔
267
            retval.emplace_back(this->sf_string, start, index + 1);
25,169✔
268
            start = index + 1;
25,169✔
269
        }
270
    }
271
    if (retval.empty() || start < this->sf_end) {
423,711✔
272
        retval.emplace_back(this->sf_string, start, this->sf_end);
423,256✔
273
    }
274

275
    return retval;
847,422✔
276
}
×
277

278
Result<ssize_t, const char*>
279
string_fragment::utf8_length() const
×
280
{
281
    ssize_t retval = 0;
×
282

283
    for (ssize_t byte_index = this->sf_begin; byte_index < this->sf_end;) {
×
284
        auto ch_size = TRY(ww898::utf::utf8::char_size([this, byte_index]() {
×
285
            return std::make_pair(this->sf_string[byte_index],
286
                                  this->sf_end - byte_index);
287
        }));
288
        byte_index += ch_size;
×
289
        retval += 1;
×
290
    }
291

292
    return Ok(retval);
×
293
}
294

295
string_fragment::case_style
296
string_fragment::detect_text_case_style() const
72✔
297
{
298
    static const auto LOWER_RE
299
        = lnav::pcre2pp::code::from_const(R"(^[^A-Z]+$)");
72✔
300
    static const auto UPPER_RE
301
        = lnav::pcre2pp::code::from_const(R"(^[^a-z]+$)");
72✔
302
    static const auto CAMEL_RE
303
        = lnav::pcre2pp::code::from_const(R"(^(?:[A-Z][a-z0-9]+)+$)");
72✔
304

305
    if (LOWER_RE.find_in(*this).ignore_error().has_value()) {
72✔
306
        return case_style::lower;
41✔
307
    }
308
    if (UPPER_RE.find_in(*this).ignore_error().has_value()) {
31✔
309
        return case_style::upper;
3✔
310
    }
311
    if (CAMEL_RE.find_in(*this).ignore_error().has_value()) {
28✔
312
        return case_style::camel;
15✔
313
    }
314

315
    return case_style::mixed;
13✔
316
}
317

318
std::string
319
string_fragment::to_string_with_case_style(case_style style) const
241✔
320
{
321
    std::string retval;
241✔
322

323
    switch (style) {
241✔
324
        case case_style::lower: {
41✔
325
            for (auto ch : *this) {
344✔
326
                retval.append(1, std::tolower(ch));
303✔
327
            }
328
            break;
41✔
329
        }
330
        case case_style::upper: {
172✔
331
            for (auto ch : *this) {
969✔
332
                retval.append(1, std::toupper(ch));
797✔
333
            }
334
            break;
172✔
335
        }
336
        case case_style::camel: {
15✔
337
            retval = this->to_string();
15✔
338
            if (!this->empty()) {
15✔
339
                retval[0] = toupper(retval[0]);
15✔
340
            }
341
            break;
15✔
342
        }
343
        case case_style::mixed: {
13✔
344
            return this->to_string();
13✔
345
        }
346
    }
347

348
    return retval;
228✔
349
}
241✔
350

351
uint64_t
352
string_fragment::bloom_bits() const
16,252✔
353
{
354
    auto a = XXH3_64bits(this->data(), this->length());
16,252✔
355
    auto b = a >> 8;
16,252✔
356
    if ((b & 0x3f) == (a & 0x3f)) {
16,252✔
357
        b = b >> 8;
107✔
358
    }
359
    auto c = b >> 8;
16,252✔
360
    if ((c & 0x3f) == (a & 0x3f) || (c & 0x3f) == (b & 0x3f)) {
16,252✔
361
        c = c >> 8;
474✔
362
    }
363

364
    uint64_t retval = 0;
16,252✔
365
    retval |= 1ULL << (a % 56);
16,252✔
366
    retval |= 1ULL << (b % 56);
16,252✔
367
    retval |= 1ULL << (c % 56);
16,252✔
368

369
    return retval;
16,252✔
370
}
371

372
std::string
373
string_fragment::to_unquoted_string() const
1,155✔
374
{
375
    auto sub_sf = *this;
1,155✔
376

377
    if (sub_sf.startswith("r") || sub_sf.startswith("u")) {
1,155✔
378
        sub_sf = sub_sf.consume_n(1).value();
24✔
379
    }
380
    if (sub_sf.length() >= 2
1,155✔
381
        && ((sub_sf.startswith("\"") && sub_sf.endswith("\""))
2,012✔
382
            || (sub_sf.startswith("'") && sub_sf.endswith("'"))))
857✔
383
    {
384
        std::string retval;
314✔
385

386
        sub_sf.sf_begin += 1;
314✔
387
        sub_sf.sf_end -= 1;
314✔
388
        retval.reserve(this->length());
314✔
389

390
        auto in_escape = false;
314✔
391
        for (auto ch : sub_sf) {
2,256✔
392
            if (in_escape) {
1,942✔
393
                switch (ch) {
×
394
                    case 'n':
×
395
                        retval.push_back('\n');
×
396
                        break;
×
397
                    case 't':
×
398
                        retval.push_back('\t');
×
399
                        break;
×
400
                    case 'r':
×
401
                        retval.push_back('\r');
×
402
                        break;
×
403
                    default:
×
404
                        retval.push_back(ch);
×
405
                        break;
×
406
                }
407
                in_escape = false;
×
408
            } else if (ch == '\\') {
1,942✔
409
                in_escape = true;
×
410
            } else {
411
                retval.push_back(ch);
1,942✔
412
            }
413
        }
414

415
        return retval;
314✔
416
    }
314✔
417

418
    return this->to_string();
841✔
419
}
420

421
uint32_t
422
string_fragment::front_codepoint() const
4,571,504✔
423
{
424
    size_t index = 0;
4,571,504✔
425
    auto read_res = ww898::utf::utf8::read(
426
        [this, &index]() { return this->data()[index++]; });
9,143,020✔
427
    if (read_res.isErr()) {
4,571,504✔
428
        return this->data()[0];
×
429
    }
430
    return read_res.unwrap();
4,571,504✔
431
}
4,571,504✔
432

433
Result<ssize_t, const char*>
434
string_fragment::codepoint_to_byte_index(ssize_t cp_index) const
4,583,671✔
435
{
436
    ssize_t retval = 0;
4,583,671✔
437

438
    while (cp_index > 0) {
8,690,503✔
439
        if (retval >= this->length()) {
4,583,671✔
440
            return Err("index is beyond the end of the string");
476,838✔
441
        }
442
        auto ch_len = TRY(ww898::utf::utf8::char_size([this, retval]() {
8,213,666✔
443
            return std::make_pair(this->data()[retval],
444
                                  this->length() - retval - 1);
445
        }));
446

447
        retval += ch_len;
4,106,832✔
448
        cp_index -= 1;
4,106,832✔
449
    }
450

451
    return Ok(retval);
4,106,832✔
452
}
453

454
string_fragment
455
string_fragment::sub_cell_range(int cell_start, int cell_end) const
3✔
456
{
457
    int byte_index = this->sf_begin;
3✔
458
    std::optional<int> byte_start;
3✔
459
    std::optional<int> byte_end;
3✔
460
    int cell_index = 0;
3✔
461

462
    while (byte_index < this->sf_end) {
9✔
463
        if (cell_start == cell_index) {
6✔
464
            byte_start = byte_index;
3✔
465
        }
466
        if (!byte_end && cell_index >= cell_end) {
6✔
467
            byte_end = byte_index;
×
468
            break;
×
469
        }
470
        auto read_res = ww898::utf::utf8::read(
471
            [this, &byte_index]() { return this->sf_string[byte_index++]; });
12✔
472
        if (read_res.isErr()) {
6✔
473
            byte_index += 1;
×
474
        } else {
475
            auto ch = read_res.unwrap();
6✔
476

477
            switch (ch) {
6✔
478
                case '\t':
×
479
                    do {
480
                        cell_index += 1;
×
481
                    } while (cell_index % 8);
×
482
                    break;
×
483
                default: {
6✔
484
                    auto wcw_res = uc_width(read_res.unwrap(), "UTF-8");
6✔
485
                    if (wcw_res < 0) {
6✔
486
                        wcw_res = 1;
×
487
                    }
488
                    cell_index += wcw_res;
6✔
489
                    break;
6✔
490
                }
491
            }
492
        }
493
    }
6✔
494
    if (cell_start == cell_index) {
3✔
495
        byte_start = byte_index;
×
496
    }
497
    if (!byte_end) {
3✔
498
        byte_end = byte_index;
3✔
499
    }
500

501
    if (byte_start && byte_end) {
3✔
502
        return this->sub_range(byte_start.value(), byte_end.value());
3✔
503
    }
504

505
    return string_fragment{};
×
506
}
507

508
size_t
509
string_fragment::column_to_byte_index(const size_t col) const
324✔
510
{
511
    auto index = this->sf_begin;
324✔
512
    size_t curr_col = 0;
324✔
513

514
    while (curr_col < col && index < this->sf_end) {
651✔
515
        auto read_res = ww898::utf::utf8::read(
516
            [this, &index]() { return this->sf_string[index++]; });
1,559✔
517
        if (read_res.isErr()) {
327✔
518
            curr_col += 1;
×
519
        } else {
520
            auto ch = read_res.unwrap();
327✔
521

522
            switch (ch) {
327✔
523
                case '\t':
×
524
                    do {
525
                        curr_col += 1;
×
526
                    } while (curr_col % 8);
×
527
                    break;
×
528
                default: {
327✔
529
                    auto wcw_res = uc_width(read_res.unwrap(), "UTF-8");
327✔
530
                    if (wcw_res < 0) {
327✔
531
                        wcw_res = 1;
×
532
                    }
533

534
                    curr_col += wcw_res;
327✔
535
                    break;
327✔
536
                }
537
            }
538
        }
539
    }
327✔
540

541
    return index - this->sf_begin;
324✔
542
}
543

544
size_t
545
string_fragment::byte_to_column_index(const size_t byte_index) const
×
546
{
547
    auto index = this->sf_begin;
×
548
    size_t curr_col = 0;
×
549

550
    while (index < this->sf_end && index < (ssize_t) byte_index) {
×
551
        auto read_res = ww898::utf::utf8::read(
552
            [this, &index]() { return this->sf_string[index++]; });
×
553
        if (read_res.isErr()) {
×
554
            curr_col += 1;
×
555
        } else {
556
            auto ch = read_res.unwrap();
×
557

558
            switch (ch) {
×
559
                case '\t':
×
560
                    do {
561
                        curr_col += 1;
×
562
                    } while (curr_col % 8);
×
563
                    break;
×
564
                default: {
×
565
                    auto wcw_res = uc_width(read_res.unwrap(), "UTF-8");
×
566
                    if (wcw_res < 0) {
×
567
                        wcw_res = 1;
×
568
                    }
569

570
                    curr_col += wcw_res;
×
571
                    break;
×
572
                }
573
            }
574
        }
575
    }
576

577
    return curr_col;
×
578
}
579

580
enum class word_char_class {
581
    space,
582
    word,
583
    symbol,
584
};
585

586
static word_char_class
587
classify_word_char(wchar_t wchar)
463✔
588
{
589
    if (uc_is_property_white_space(wchar)) {
463✔
590
        return word_char_class::space;
65✔
591
    }
592
    static constexpr uint32_t word_mask
593
        = UC_CATEGORY_MASK_L | UC_CATEGORY_MASK_N | UC_CATEGORY_MASK_Pc;
594
    if (uc_is_general_category_withtable(wchar, word_mask)) {
398✔
595
        return word_char_class::word;
361✔
596
    }
597
    return word_char_class::symbol;
37✔
598
}
599

600
static bool
601
is_word_start(word_char_class curr_class, word_char_class prev_class)
390✔
602
{
603
    if (curr_class == word_char_class::word
390✔
604
        && prev_class != word_char_class::word)
309✔
605
    {
606
        return true;
91✔
607
    }
608
    if (curr_class == word_char_class::symbol
299✔
609
        && prev_class == word_char_class::space)
32✔
610
    {
611
        return true;
10✔
612
    }
613
    return false;
289✔
614
}
615

616
std::optional<int>
617
string_fragment::next_word(const int start_col) const
22✔
618
{
619
    auto index = this->sf_begin;
22✔
620
    int curr_col = 0;
22✔
621
    auto prev_class = word_char_class::space;
22✔
622

623
    while (index < this->sf_end) {
147✔
624
        auto read_res = ww898::utf::utf8::read(
625
            [this, &index]() { return this->sf_string[index++]; });
290✔
626
        if (read_res.isErr()) {
138✔
627
            curr_col += 1;
×
NEW
628
            continue;
×
629
        }
630
        auto ch = read_res.unwrap();
138✔
631

632
        if (ch == '\t') {
138✔
633
            prev_class = word_char_class::space;
1✔
634
            do {
635
                curr_col += 1;
7✔
636
            } while (curr_col % 8);
7✔
637
            continue;
1✔
638
        }
639

640
        auto wcw_res = uc_width(ch, "UTF-8");
137✔
641
        if (wcw_res < 0) {
137✔
NEW
642
            wcw_res = 1;
×
643
        }
644

645
        auto curr_class = classify_word_char(ch);
137✔
646
        if (curr_col > start_col && is_word_start(curr_class, prev_class)) {
137✔
647
            return curr_col;
13✔
648
        }
649
        prev_class = curr_class;
124✔
650
        curr_col += wcw_res;
124✔
651
    }
138✔
652

653
    return std::nullopt;
9✔
654
}
655

656
std::optional<int>
657
string_fragment::prev_word(const int start_col) const
28✔
658
{
659
    auto index = this->sf_begin;
28✔
660
    int curr_col = 0;
28✔
661
    auto prev_class = word_char_class::space;
28✔
662
    std::optional<int> last_word_col;
28✔
663

664
    while (index < this->sf_end) {
161✔
665
        auto read_res = ww898::utf::utf8::read(
666
            [this, &index]() { return this->sf_string[index++]; });
310✔
667
        if (read_res.isErr()) {
148✔
668
            curr_col += 1;
×
NEW
669
            continue;
×
670
        }
671
        auto ch = read_res.unwrap();
148✔
672

673
        if (ch == '\t') {
148✔
674
            if (curr_col >= start_col) {
2✔
NEW
675
                return last_word_col;
×
676
            }
677
            prev_class = word_char_class::space;
2✔
678
            do {
679
                curr_col += 1;
14✔
680
            } while (curr_col % 8);
14✔
681
            continue;
2✔
682
        }
683

684
        if (curr_col >= start_col) {
146✔
685
            return last_word_col;
15✔
686
        }
687

688
        auto wcw_res = uc_width(ch, "UTF-8");
131✔
689
        if (wcw_res < 0) {
131✔
NEW
690
            wcw_res = 1;
×
691
        }
692

693
        auto curr_class = classify_word_char(ch);
131✔
694
        if (is_word_start(curr_class, prev_class)) {
131✔
695
            last_word_col = curr_col;
34✔
696
        }
697
        prev_class = curr_class;
131✔
698
        curr_col += wcw_res;
131✔
699
    }
148✔
700

701
    return last_word_col;
13✔
702
}
703

704
std::optional<int>
705
string_fragment::curr_word(const int start_col) const
44✔
706
{
707
    auto index = this->sf_begin;
44✔
708
    int curr_col = 0;
44✔
709
    auto prev_class = word_char_class::space;
44✔
710
    std::optional<int> last_word_col;
44✔
711

712
    while (index < this->sf_end) {
197✔
713
        auto read_res = ww898::utf::utf8::read(
714
            [this, &index]() { return this->sf_string[index++]; });
422✔
715
        if (read_res.isErr()) {
195✔
NEW
716
            curr_col += 1;
×
NEW
717
            continue;
×
718
        }
719
        auto ch = read_res.unwrap();
195✔
720

721
        if (ch == '\t') {
195✔
NEW
722
            if (curr_col >= start_col) {
×
NEW
723
                return std::nullopt;
×
724
            }
NEW
725
            prev_class = word_char_class::space;
×
726
            do {
NEW
727
                curr_col += 1;
×
NEW
728
            } while (curr_col % 8);
×
NEW
729
            continue;
×
730
        }
731

732
        auto wcw_res = uc_width(ch, "UTF-8");
195✔
733
        if (wcw_res < 0) {
195✔
NEW
734
            wcw_res = 1;
×
735
        }
736

737
        auto curr_class = classify_word_char(ch);
195✔
738

739
        if (start_col < curr_col + wcw_res) {
195✔
740
            if (curr_class == word_char_class::space) {
42✔
741
                return std::nullopt;
8✔
742
            }
743
            if (is_word_start(curr_class, prev_class)) {
34✔
744
                return curr_col;
17✔
745
            }
746
            return last_word_col;
17✔
747
        }
748

749
        if (is_word_start(curr_class, prev_class)) {
153✔
750
            last_word_col = curr_col;
37✔
751
        }
752
        prev_class = curr_class;
153✔
753
        curr_col += wcw_res;
153✔
754
    }
195✔
755

756
    return std::nullopt;
2✔
757
}
758

759
std::string
NEW
760
string_fragment::transform_codepoints(
×
761
    const std::function<uint32_t(uint32_t)>& xform) const
762
{
NEW
763
    std::string out;
×
NEW
764
    out.reserve(this->length());
×
765

NEW
766
    auto index = this->sf_begin;
×
NEW
767
    while (index < this->sf_end) {
×
NEW
768
        auto byte_before = index;
×
769
        auto read_res = ww898::utf::utf8::read(
NEW
770
            [this, &index]() { return this->sf_string[index++]; });
×
NEW
771
        if (read_res.isErr()) {
×
NEW
772
            for (auto j = byte_before; j < index; ++j) {
×
NEW
773
                out.push_back(this->sf_string[j]);
×
774
            }
NEW
775
            continue;
×
776
        }
NEW
777
        auto cp = read_res.unwrap();
×
NEW
778
        auto new_cp = xform(cp);
×
NEW
779
        ww898::utf::utf8::write(
×
NEW
780
            new_cp, [&out](const char b) { out.push_back(b); });
×
781
    }
NEW
782
    return out;
×
NEW
783
}
×
784

785
size_t
786
string_fragment::column_width() const
281,831✔
787
{
788
    auto index = this->sf_begin;
281,831✔
789
    size_t retval = 0;
281,831✔
790

791
    while (index < this->sf_end) {
2,309,236✔
792
        auto read_res = ww898::utf::utf8::read(
793
            [this, &index]() { return this->sf_string[index++]; });
4,113,126✔
794
        if (read_res.isErr()) {
2,027,405✔
795
            retval += 1;
6✔
796
        } else {
797
            auto ch = read_res.unwrap();
2,027,399✔
798

799
            switch (ch) {
2,027,399✔
800
                case '\t':
34,367✔
801
                    do {
802
                        retval += 1;
34,367✔
803
                    } while (retval % 8);
34,367✔
804
                    break;
6,160✔
805
                default: {
2,021,239✔
806
                    auto wcw_res = uc_width(read_res.unwrap(), "UTF-8");
2,021,239✔
807
                    if (wcw_res < 0) {
2,021,239✔
808
                        wcw_res = 1;
2,606✔
809
                    }
810
                    retval += wcw_res;
2,021,239✔
811
                    break;
2,021,239✔
812
                }
813
            }
814
        }
815
    }
2,027,405✔
816

817
    return retval;
281,831✔
818
}
819

820
struct single_producer : string_fragment_producer {
821
    explicit single_producer(const string_fragment& sf) : sp_frag(sf) {}
2,349✔
822

823
    next_result next() override
1,842✔
824
    {
825
        auto retval = std::exchange(this->sp_frag, std::nullopt);
1,842✔
826
        if (retval) {
1,842✔
827
            return retval.value();
921✔
828
        }
829

830
        return eof{};
921✔
831
    }
832

833
    std::optional<string_fragment> sp_frag;
834
};
835

836
std::unique_ptr<string_fragment_producer>
837
string_fragment_producer::from(string_fragment sf)
2,349✔
838
{
839
    return std::make_unique<single_producer>(sf);
2,349✔
840
}
841

842
std::string
843
string_fragment_producer::to_string()
18,194✔
844
{
845
    auto retval = std::string{};
18,194✔
846

847
    retval.reserve(this->estimated_size());
18,194✔
848
    auto for_res = this->for_each(
849
        [&retval](string_fragment sf) -> Result<void, std::string> {
×
850
            retval.append(sf.data(), sf.length());
23,622✔
851
            return Ok();
23,622✔
852
        });
18,194✔
853

854
    return retval;
36,388✔
855
}
18,194✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc