• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tstack / lnav / 11872214087-1756

16 Nov 2024 06:12PM UTC coverage: 70.243% (+0.5%) from 69.712%
11872214087-1756

push

github

tstack
[build] disable regex101

46266 of 65866 relevant lines covered (70.24%)

467515.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

74.3
/src/base/intern_string.cc
1
/**
2
 * Copyright (c) 2014, Timothy Stack
3
 *
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * * Redistributions of source code must retain the above copyright notice, this
10
 * list of conditions and the following disclaimer.
11
 * * Redistributions in binary form must reproduce the above copyright notice,
12
 * this list of conditions and the following disclaimer in the documentation
13
 * and/or other materials provided with the distribution.
14
 * * Neither the name of Timothy Stack nor the names of its contributors
15
 * may be used to endorse or promote products derived from this software
16
 * without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 *
29
 * @file intern_string.cc
30
 */
31

32
#include <mutex>
33

34
#include "intern_string.hh"
35

36
#include <string.h>
37

38
#include "config.h"
39
#include "fmt/ostream.h"
40
#include "pcrepp/pcre2pp.hh"
41
#include "ww898/cp_utf8.hpp"
42
#include "xxHash/xxhash.h"
43

44
const static int TABLE_SIZE = 4095;
45

46
struct intern_string::intern_table {
47
    ~intern_table()
1,021✔
48
    {
49
        for (auto is : this->it_table) {
4,182,016✔
50
            auto curr = is;
4,180,995✔
51

52
            while (curr != nullptr) {
6,184,959✔
53
                auto next = curr->is_next;
2,003,964✔
54

55
                delete curr;
2,003,964✔
56
                curr = next;
2,003,964✔
57
            }
58
        }
59
    }
1,021✔
60

61
    intern_string* it_table[TABLE_SIZE];
62
};
63

64
intern_table_lifetime
65
intern_string::get_table_lifetime()
10,772,986✔
66
{
67
    static intern_table_lifetime retval = std::make_shared<intern_table>();
10,772,986✔
68

69
    return retval;
10,772,986✔
70
}
71

72
unsigned long
73
hash_str(const char* str, size_t len)
10,882,238✔
74
{
75
    return XXH3_64bits(str, len);
10,882,238✔
76
}
77

78
const intern_string*
79
intern_string::lookup(const char* str, ssize_t len) noexcept
10,766,614✔
80
{
81
    unsigned long h;
82
    intern_string* curr;
83

84
    if (len == -1) {
10,766,614✔
85
        len = strlen(str);
105,684✔
86
    }
87
    h = hash_str(str, len) % TABLE_SIZE;
10,766,614✔
88

89
    {
90
        static std::mutex table_mutex;
91

92
        std::lock_guard<std::mutex> lk(table_mutex);
10,766,614✔
93
        auto tab = get_table_lifetime();
10,766,614✔
94

95
        curr = tab->it_table[h];
10,766,614✔
96
        while (curr != nullptr) {
12,387,946✔
97
            if (static_cast<ssize_t>(curr->is_str.size()) == len
10,383,982✔
98
                && strncmp(curr->is_str.c_str(), str, len) == 0)
10,383,982✔
99
            {
100
                return curr;
8,762,650✔
101
            }
102
            curr = curr->is_next;
1,621,332✔
103
        }
104

105
        curr = new intern_string(str, len);
2,003,964✔
106
        curr->is_next = tab->it_table[h];
2,003,964✔
107
        tab->it_table[h] = curr;
2,003,964✔
108

109
        return curr;
2,003,964✔
110
    }
10,766,614✔
111
}
112

113
const intern_string*
114
intern_string::lookup(const string_fragment& sf) noexcept
622,407✔
115
{
116
    return lookup(sf.data(), sf.length());
622,407✔
117
}
118

119
const intern_string*
120
intern_string::lookup(const std::string& str) noexcept
3,370,231✔
121
{
122
    return lookup(str.c_str(), str.size());
3,370,231✔
123
}
124

125
bool
126
intern_string::startswith(const char* prefix) const
×
127
{
128
    const char* curr = this->is_str.data();
×
129

130
    while (*prefix != '\0' && *prefix == *curr) {
×
131
        prefix += 1;
×
132
        curr += 1;
×
133
    }
134

135
    return *prefix == '\0';
×
136
}
137

138
string_fragment
139
string_fragment::trim(const char* tokens) const
22,296✔
140
{
141
    string_fragment retval = *this;
22,296✔
142

143
    while (retval.sf_begin < retval.sf_end) {
35,965✔
144
        bool found = false;
35,883✔
145

146
        for (int lpc = 0; tokens[lpc] != '\0'; lpc++) {
121,055✔
147
            if (retval.sf_string[retval.sf_begin] == tokens[lpc]) {
98,841✔
148
                found = true;
13,669✔
149
                break;
13,669✔
150
            }
151
        }
152
        if (!found) {
35,883✔
153
            break;
22,214✔
154
        }
155

156
        retval.sf_begin += 1;
13,669✔
157
    }
158
    while (retval.sf_begin < retval.sf_end) {
22,746✔
159
        bool found = false;
22,664✔
160

161
        for (int lpc = 0; tokens[lpc] != '\0'; lpc++) {
108,618✔
162
            if (retval.sf_string[retval.sf_end - 1] == tokens[lpc]) {
86,404✔
163
                found = true;
450✔
164
                break;
450✔
165
            }
166
        }
167
        if (!found) {
22,664✔
168
            break;
22,214✔
169
        }
170

171
        retval.sf_end -= 1;
450✔
172
    }
173

174
    return retval;
22,296✔
175
}
176

177
string_fragment
178
string_fragment::trim() const
17,836✔
179
{
180
    return this->trim(" \t\r\n");
17,836✔
181
}
182

183
string_fragment
184
string_fragment::rtrim(const char* tokens) const
1,400✔
185
{
186
    string_fragment retval = *this;
1,400✔
187

188
    while (retval.sf_begin < retval.sf_end) {
2,767✔
189
        bool found = false;
2,764✔
190

191
        for (int lpc = 0; tokens[lpc] != '\0'; lpc++) {
4,161✔
192
            if (retval.sf_string[retval.sf_end - 1] == tokens[lpc]) {
2,764✔
193
                found = true;
1,367✔
194
                break;
1,367✔
195
            }
196
        }
197
        if (!found) {
2,764✔
198
            break;
1,397✔
199
        }
200

201
        retval.sf_end -= 1;
1,367✔
202
    }
203

204
    return retval;
1,400✔
205
}
206

207
std::optional<string_fragment>
208
string_fragment::consume_n(int amount) const
373✔
209
{
210
    if (amount > this->length()) {
373✔
211
        return std::nullopt;
×
212
    }
213

214
    return string_fragment{
373✔
215
        this->sf_string,
373✔
216
        this->sf_begin + amount,
373✔
217
        this->sf_end,
373✔
218
    };
373✔
219
}
220

221
string_fragment::split_result
222
string_fragment::split_n(int amount) const
359,854✔
223
{
224
    if (amount > this->length()) {
359,854✔
225
        return std::nullopt;
×
226
    }
227

228
    return std::make_pair(
359,854✔
229
        string_fragment{
×
230
            this->sf_string,
359,854✔
231
            this->sf_begin,
359,854✔
232
            this->sf_begin + amount,
359,854✔
233
        },
234
        string_fragment{
359,854✔
235
            this->sf_string,
359,854✔
236
            this->sf_begin + amount,
359,854✔
237
            this->sf_end,
359,854✔
238
        });
359,854✔
239
}
240

241
std::vector<string_fragment>
242
string_fragment::split_lines() const
121,554✔
243
{
244
    std::vector<string_fragment> retval;
121,554✔
245
    int start = this->sf_begin;
121,554✔
246

247
    for (auto index = start; index < this->sf_end; index++) {
23,675,738✔
248
        if (this->sf_string[index] == '\n') {
23,554,184✔
249
            retval.emplace_back(this->sf_string, start, index + 1);
7,307✔
250
            start = index + 1;
7,307✔
251
        }
252
    }
253
    if (retval.empty() || start < this->sf_end) {
121,554✔
254
        retval.emplace_back(this->sf_string, start, this->sf_end);
121,355✔
255
    }
256

257
    return retval;
243,108✔
258
}
×
259

260
Result<ssize_t, const char*>
261
string_fragment::utf8_length() const
354,068✔
262
{
263
    ssize_t retval = 0;
354,068✔
264

265
    for (ssize_t byte_index = this->sf_begin; byte_index < this->sf_end;) {
1,479,728✔
266
        auto ch_size = TRY(ww898::utf::utf8::char_size([this, byte_index]() {
2,251,320✔
267
            return std::make_pair(this->sf_string[byte_index],
268
                                  this->sf_end - byte_index);
269
        }));
270
        byte_index += ch_size;
1,125,660✔
271
        retval += 1;
1,125,660✔
272
    }
273

274
    return Ok(retval);
354,068✔
275
}
276

277
string_fragment::case_style
278
string_fragment::detect_text_case_style() const
75✔
279
{
280
    static const auto LOWER_RE
281
        = lnav::pcre2pp::code::from_const(R"(^[^A-Z]+$)");
75✔
282
    static const auto UPPER_RE
283
        = lnav::pcre2pp::code::from_const(R"(^[^a-z]+$)");
75✔
284
    static const auto CAMEL_RE
285
        = lnav::pcre2pp::code::from_const(R"(^(?:[A-Z][a-z0-9]+)+$)");
75✔
286

287
    if (LOWER_RE.find_in(*this).ignore_error().has_value()) {
75✔
288
        return case_style::lower;
41✔
289
    }
290
    if (UPPER_RE.find_in(*this).ignore_error().has_value()) {
34✔
291
        return case_style::upper;
3✔
292
    }
293
    if (CAMEL_RE.find_in(*this).ignore_error().has_value()) {
31✔
294
        return case_style::camel;
18✔
295
    }
296

297
    return case_style::mixed;
13✔
298
}
299

300
std::string
301
string_fragment::to_string_with_case_style(case_style style) const
75✔
302
{
303
    std::string retval;
75✔
304

305
    switch (style) {
75✔
306
        case case_style::lower: {
41✔
307
            for (auto ch : *this) {
344✔
308
                retval.append(1, std::tolower(ch));
303✔
309
            }
310
            break;
41✔
311
        }
312
        case case_style::upper: {
3✔
313
            for (auto ch : *this) {
27✔
314
                retval.append(1, std::toupper(ch));
24✔
315
            }
316
            break;
3✔
317
        }
318
        case case_style::camel: {
18✔
319
            retval = this->to_string();
18✔
320
            if (!this->empty()) {
18✔
321
                retval[0] = toupper(retval[0]);
18✔
322
            }
323
            break;
18✔
324
        }
325
        case case_style::mixed: {
13✔
326
            return this->to_string();
13✔
327
        }
328
    }
329

330
    return retval;
62✔
331
}
75✔
332

333
std::string
334
string_fragment::to_unquoted_string() const
775✔
335
{
336
    auto sub_sf = *this;
775✔
337

338
    if (sub_sf.startswith("r") || sub_sf.startswith("u")) {
775✔
339
        sub_sf = sub_sf.consume_n(1).value();
21✔
340
    }
341
    if (sub_sf.length() >= 2
775✔
342
        && ((sub_sf.startswith("\"") && sub_sf.endswith("\""))
1,372✔
343
            || (sub_sf.startswith("'") && sub_sf.endswith("'"))))
597✔
344
    {
345
        std::string retval;
194✔
346

347
        sub_sf.sf_begin += 1;
194✔
348
        sub_sf.sf_end -= 1;
194✔
349
        retval.reserve(this->length());
194✔
350

351
        auto in_escape = false;
194✔
352
        for (auto ch : sub_sf) {
1,423✔
353
            if (in_escape) {
1,229✔
354
                switch (ch) {
×
355
                    case 'n':
×
356
                        retval.push_back('\n');
×
357
                        break;
×
358
                    case 't':
×
359
                        retval.push_back('\t');
×
360
                        break;
×
361
                    case 'r':
×
362
                        retval.push_back('\r');
×
363
                        break;
×
364
                    default:
×
365
                        retval.push_back(ch);
×
366
                        break;
×
367
                }
368
                in_escape = false;
×
369
            } else if (ch == '\\') {
1,229✔
370
                in_escape = true;
×
371
            } else {
372
                retval.push_back(ch);
1,229✔
373
            }
374
        }
375

376
        return retval;
194✔
377
    }
194✔
378

379
    return this->to_string();
581✔
380
}
381

382
uint32_t
383
string_fragment::front_codepoint() const
3,332,484✔
384
{
385
    size_t index = 0;
3,332,484✔
386
    auto read_res = ww898::utf::utf8::read(
387
        [this, &index]() { return this->data()[index++]; });
6,664,976✔
388
    if (read_res.isErr()) {
3,332,484✔
389
        return this->data()[0];
×
390
    }
391
    return read_res.unwrap();
3,332,484✔
392
}
3,332,484✔
393

394
Result<ssize_t, const char*>
395
string_fragment::codepoint_to_byte_index(ssize_t cp_index) const
3,342,720✔
396
{
397
    ssize_t retval = 0;
3,342,720✔
398

399
    while (cp_index > 0) {
6,326,037✔
400
        if (retval >= this->length()) {
3,342,720✔
401
            return Err("index is beyond the end of the string");
359,402✔
402
        }
403
        auto ch_len = TRY(ww898::utf::utf8::char_size([this, retval]() {
5,966,636✔
404
            return std::make_pair(this->data()[retval],
405
                                  this->length() - retval - 1);
406
        }));
407

408
        retval += ch_len;
2,983,317✔
409
        cp_index -= 1;
2,983,317✔
410
    }
411

412
    return Ok(retval);
2,983,317✔
413
}
414

415
string_fragment
416
string_fragment::sub_cell_range(int cell_start, int cell_end) const
×
417
{
418
    int byte_index = this->sf_begin;
×
419
    std::optional<int> byte_start;
×
420
    std::optional<int> byte_end;
×
421
    int cell_index = 0;
×
422

423
    while (byte_index < this->sf_end) {
×
424
        if (cell_start == cell_index) {
×
425
            byte_start = byte_index;
×
426
        }
427
        if (!byte_end && cell_index >= cell_end) {
×
428
            byte_end = byte_index;
×
429
            break;
×
430
        }
431
        auto read_res = ww898::utf::utf8::read(
432
            [this, &byte_index]() { return this->sf_string[byte_index++]; });
×
433
        if (read_res.isErr()) {
×
434
            byte_index += 1;
×
435
        } else {
436
            auto ch = read_res.unwrap();
×
437

438
            switch (ch) {
×
439
                case '\t':
×
440
                    do {
441
                        cell_index += 1;
×
442
                    } while (cell_index % 8);
×
443
                    break;
×
444
                default: {
×
445
                    auto wcw_res = wcwidth(read_res.unwrap());
×
446
                    if (wcw_res < 0) {
×
447
                        wcw_res = 1;
×
448
                    }
449
                    cell_index += wcw_res;
×
450
                    break;
×
451
                }
452
            }
453
        }
454
    }
455
    if (cell_start == cell_index) {
×
456
        byte_start = byte_index;
×
457
    }
458
    if (!byte_end) {
×
459
        byte_end = byte_index;
×
460
    }
461

462
    if (byte_start && byte_end) {
×
463
        return this->sub_range(byte_start.value(), byte_end.value());
×
464
    }
465

466
    return string_fragment{};
×
467
}
468

469
size_t
470
string_fragment::column_width() const
4,963✔
471
{
472
    auto index = this->sf_begin;
4,963✔
473
    size_t retval = 0;
4,963✔
474

475
    while (index < this->sf_end) {
171,073✔
476
        auto read_res = ww898::utf::utf8::read(
477
            [this, &index]() { return this->sf_string[index++]; });
332,684✔
478
        if (read_res.isErr()) {
166,110✔
479
            retval += 1;
×
480
        } else {
481
            auto ch = read_res.unwrap();
166,110✔
482

483
            switch (ch) {
166,110✔
484
                case '\t':
×
485
                    do {
486
                        retval += 1;
×
487
                    } while (retval % 8);
×
488
                    break;
×
489
                default: {
166,110✔
490
                    auto wcw_res = wcwidth(read_res.unwrap());
166,110✔
491
                    if (wcw_res < 0) {
166,110✔
492
                        wcw_res = 1;
1,187✔
493
                    }
494
                    retval += wcw_res;
166,110✔
495
                    break;
166,110✔
496
                }
497
            }
498
        }
499
    }
166,110✔
500

501
    return retval;
4,963✔
502
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc