• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tstack / lnav / 25603066199-3032

09 May 2026 02:05PM UTC coverage: 70.204% (+0.04%) from 70.162%
25603066199-3032

push

github

tstack
[tabular] multi-line support

238 of 261 new or added lines in 9 files covered. (91.19%)

10 existing lines in 3 files now uncovered.

57551 of 81977 relevant lines covered (70.2%)

634088.25 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.77
/src/log_format.cc
1
/**
2
 * Copyright (c) 2007-2015, Timothy Stack
3
 *
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * * Redistributions of source code must retain the above copyright notice, this
10
 * list of conditions and the following disclaimer.
11
 * * Redistributions in binary form must reproduce the above copyright notice,
12
 * this list of conditions and the following disclaimer in the documentation
13
 * and/or other materials provided with the distribution.
14
 * * Neither the name of Timothy Stack nor the names of its contributors
15
 * may be used to endorse or promote products derived from this software
16
 * without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 */
29

30
#include <algorithm>
31
#include <chrono>
32
#include <memory>
33
#include <vector>
34

35
#include <fnmatch.h>
36
#include <stdio.h>
37
#include <string.h>
38

39
#include "base/fs_util.hh"
40
#include "base/humanize.hh"
41
#include "base/humanize.time.hh"
42
#include "base/intern_string.hh"
43
#include "base/is_utf8.hh"
44
#include "base/itertools.enumerate.hh"
45
#include "base/itertools.hh"
46
#include "base/map_util.hh"
47
#include "base/opt_util.hh"
48
#include "base/separated_string.hh"
49
#include "base/snippet_highlighters.hh"
50
#include "base/string_attr_type.hh"
51
#include "base/string_util.hh"
52
#include "base/time_util.hh"
53
#include "bookmarks.hh"
54
#include "command_executor.hh"
55
#include "config.h"
56
#include "fast_float/single_include/fast_float/fast_float.h"
57
#include "fmt/format.h"
58
#include "lnav_util.hh"
59
#include "log_format_ext.hh"
60
#include "log_search_table.hh"
61
#include "log_vtab_impl.hh"
62
#include "logfile_sub_source.hh"
63
#include "ptimec.hh"
64
#include "readline_highlighters.hh"
65
#include "scn/scan.h"
66
#include "sql_util.hh"
67
#include "sqlite-extension-func.hh"
68
#include "sqlitepp.hh"
69
#include "yajlpp/yajlpp.hh"
70
#include "yajlpp/yajlpp_def.hh"
71

72
using namespace lnav::roles::literals;
73
using namespace std::chrono_literals;
74
using std::string_literals::operator""s;
75

76
static auto intern_lifetime = intern_string::get_table_lifetime();
77

78
constexpr string_attr_type<void> L_PREFIX("prefix");
79
constexpr string_attr_type<void> L_TIMESTAMP("timestamp");
80
constexpr string_attr_type<void> L_LEVEL("level");
81
constexpr string_attr_type<std::shared_ptr<logfile>> L_FILE("file");
82
constexpr string_attr_type<bookmark_metadata*> L_PARTITION("partition");
83
constexpr string_attr_type<void> L_OPID("opid");
84
constexpr string_attr_type<bookmark_metadata*> L_META("meta");
85
constexpr string_attr_type<logfile*> L_METRIC_SOURCE("metric_source");
86

87
std::vector<std::shared_ptr<external_log_format>>
88
    external_log_format::GRAPH_ORDERED_FORMATS;
89

90
const intern_string_t log_format::LOG_TIME_STR
91
    = intern_string::lookup("log_time");
92
const intern_string_t log_format::LOG_LEVEL_STR
93
    = intern_string::lookup("log_level");
94
const intern_string_t log_format::LOG_OPID_STR
95
    = intern_string::lookup("log_opid");
96
const intern_string_t log_format::LOG_THREAD_ID_STR
97
    = intern_string::lookup("log_thread_id");
98
const intern_string_t log_format::LOG_RAW_TEXT_STR
99
    = intern_string::lookup("log_raw_text");
100
const intern_string_t log_format::LOG_EXTRA_FIELDS_STR
101
    = intern_string::lookup("log_extra_fields");
102

103
static constexpr uint32_t DATE_TIME_SET_FLAGS = ETF_YEAR_SET | ETF_MONTH_SET
104
    | ETF_DAY_SET | ETF_HOUR_SET | ETF_MINUTE_SET | ETF_SECOND_SET;
105

106
log_level_stats&
107
log_level_stats::operator|=(const log_level_stats& rhs)
954✔
108
{
109
    this->lls_error_count += rhs.lls_error_count;
954✔
110
    this->lls_warning_count += rhs.lls_warning_count;
954✔
111
    this->lls_total_count += rhs.lls_total_count;
954✔
112

113
    return *this;
954✔
114
}
115

116
log_op_description&
117
log_op_description::operator|=(const log_op_description& rhs)
390✔
118
{
119
    if (!this->lod_index && rhs.lod_index) {
390✔
120
        this->lod_index = rhs.lod_index;
×
121
    }
122
    if (this->lod_elements.size() < rhs.lod_elements.size()) {
390✔
123
        this->lod_elements = rhs.lod_elements;
×
124
    }
125

126
    return *this;
390✔
127
}
128

129
void
130
opid_time_range::clear()
×
131
{
132
    this->otr_range.invalidate();
×
133
    this->otr_sub_ops.clear();
×
134
    this->otr_level_stats = {};
×
135
}
136

137
opid_time_range&
138
opid_time_range::operator|=(const opid_time_range& rhs)
401✔
139
{
140
    if (rhs.otr_range.tr_begin < this->otr_range.tr_begin) {
401✔
141
        this->otr_description = rhs.otr_description;
11✔
142
    } else {
143
        this->otr_description |= rhs.otr_description;
390✔
144
    }
145
    this->otr_range |= rhs.otr_range;
401✔
146
    this->otr_level_stats |= rhs.otr_level_stats;
401✔
147
    for (const auto& rhs_sub : rhs.otr_sub_ops) {
410✔
148
        bool found = false;
9✔
149

150
        for (auto& sub : this->otr_sub_ops) {
18✔
151
            if (sub.ostr_subid == rhs_sub.ostr_subid) {
9✔
152
                sub.ostr_range |= rhs_sub.ostr_range;
9✔
153
                found = true;
9✔
154
            }
155
        }
156
        if (!found) {
9✔
157
            this->otr_sub_ops.emplace_back(rhs_sub);
×
158
        }
159
    }
160
    std::stable_sort(this->otr_sub_ops.begin(), this->otr_sub_ops.end());
401✔
161

162
    return *this;
401✔
163
}
164

165
void
166
thread_id_time_range::clear()
×
167
{
168
    this->titr_range.invalidate();
×
169
    this->titr_level_stats = {};
×
170
}
171

172
thread_id_time_range&
173
thread_id_time_range::operator|=(const thread_id_time_range& rhs)
553✔
174
{
175
    this->titr_range |= rhs.titr_range;
553✔
176
    this->titr_level_stats |= rhs.titr_level_stats;
553✔
177

178
    return *this;
553✔
179
}
180

181
void
182
log_level_stats::update_msg_count(log_level_t lvl, int32_t amount)
44,584✔
183
{
184
    switch (lvl) {
44,584✔
185
        case LEVEL_FATAL:
3,660✔
186
        case LEVEL_CRITICAL:
187
        case LEVEL_ERROR:
188
            this->lls_error_count += amount;
3,660✔
189
            break;
3,660✔
190
        case LEVEL_WARNING:
397✔
191
            this->lls_warning_count += amount;
397✔
192
            break;
397✔
193
        default:
40,527✔
194
            break;
40,527✔
195
    }
196
    this->lls_total_count += amount;
44,584✔
197
}
44,584✔
198

199
void
200
opid_time_range::close_sub_ops(const string_fragment& subid)
×
201
{
202
    for (auto& other_sub : this->otr_sub_ops) {
×
203
        if (other_sub.ostr_subid == subid) {
×
204
            other_sub.ostr_open = false;
×
205
        }
206
    }
207
}
208

209
log_thread_id_map::iterator
210
log_thread_id_state::insert_tid(ArenaAlloc::Alloc<char>& alloc,
14,315✔
211
                                const string_fragment& tid,
212
                                const std::chrono::microseconds& us)
213
{
214
    auto retval = this->ltis_tid_ranges.find(tid);
14,315✔
215
    if (retval == this->ltis_tid_ranges.end()) {
14,315✔
216
        auto tid_copy = tid.to_owned(alloc);
9,205✔
217
        auto titr = thread_id_time_range{time_range{us, us}};
9,205✔
218
        auto emplace_res = this->ltis_tid_ranges.emplace(tid_copy, titr);
9,205✔
219
        retval = emplace_res.first;
9,205✔
220
    } else {
221
        retval->second.titr_range.extend_to(us);
5,110✔
222
    }
223

224
    return retval;
14,315✔
225
}
226

227
log_opid_map::iterator
228
log_opid_state::insert_op(ArenaAlloc::Alloc<char>& alloc,
14,349✔
229
                          const string_fragment& opid,
230
                          const std::chrono::microseconds& us,
231
                          timestamp_point_of_reference_t poref,
232
                          std::chrono::microseconds duration)
233
{
234
    auto retval = this->los_opid_ranges.find(opid);
14,349✔
235
    if (retval == this->los_opid_ranges.end()) {
14,349✔
236
        auto opid_copy = opid.to_owned(alloc);
7,756✔
237
        auto otr = opid_time_range{time_range{us, us}};
7,756✔
238
        auto emplace_res = this->los_opid_ranges.emplace(opid_copy, otr);
7,756✔
239
        retval = emplace_res.first;
7,756✔
240
    } else {
7,756✔
241
        retval->second.otr_range.extend_to(us);
6,593✔
242
    }
243
    if (duration > 0us) {
14,349✔
244
        auto other_us = us;
4,215✔
245
        switch (poref) {
4,215✔
246
            case timestamp_point_of_reference_t::end:
4,129✔
247
                other_us -= duration;
4,129✔
248
                break;
4,129✔
249
            case timestamp_point_of_reference_t::start:
86✔
250
                other_us += duration;
86✔
251
                break;
86✔
252
        }
253
        retval->second.otr_range.extend_to(other_us);
4,215✔
254
    }
255

256
    return retval;
14,349✔
257
}
258

259
opid_sub_time_range*
260
log_opid_state::sub_op_in_use(ArenaAlloc::Alloc<char>& alloc,
78✔
261
                              log_opid_map::iterator& op_iter,
262
                              const string_fragment& subid,
263
                              const std::chrono::microseconds& us,
264
                              log_level_t level)
265
{
266
    const auto& opid = op_iter->first;
78✔
267
    auto sub_iter = this->los_sub_in_use.find(subid);
78✔
268
    if (sub_iter == this->los_sub_in_use.end()) {
78✔
269
        auto emp_res
270
            = this->los_sub_in_use.emplace(subid.to_owned(alloc), opid);
53✔
271

272
        sub_iter = emp_res.first;
53✔
273
    }
274

275
    auto retval = sub_iter->first;
78✔
276
    if (sub_iter->second != opid) {
78✔
277
        auto other_otr
278
            = lnav::map::find(this->los_opid_ranges, sub_iter->second);
×
279
        if (other_otr) {
×
280
            other_otr->get().close_sub_ops(retval);
×
281
        }
282
    }
283
    sub_iter->second = opid;
78✔
284

285
    auto& otr = op_iter->second;
78✔
286
    auto sub_op_iter = otr.otr_sub_ops.rbegin();
78✔
287
    for (; sub_op_iter != otr.otr_sub_ops.rend(); ++sub_op_iter) {
78✔
288
        if (sub_op_iter->ostr_open && sub_op_iter->ostr_subid == retval) {
25✔
289
            break;
25✔
290
        }
291
    }
292
    if (sub_op_iter == otr.otr_sub_ops.rend()) {
78✔
293
        otr.otr_sub_ops.emplace_back(opid_sub_time_range{
53✔
294
            retval,
295
            time_range{us, us},
296
        });
297
        otr.otr_sub_ops.back().ostr_level_stats.update_msg_count(level);
53✔
298

299
        return &otr.otr_sub_ops.back();
53✔
300
    } else {
301
        sub_op_iter->ostr_range.extend_to(us);
25✔
302
        sub_op_iter->ostr_level_stats.update_msg_count(level);
25✔
303
        return &(*sub_op_iter);
25✔
304
    }
305
}
306

307
std::optional<std::string>
308
log_format::opid_descriptor::matches(const string_fragment& sf) const
2,532✔
309
{
310
    if (this->od_extractor.pp_value) {
2,532✔
311
        thread_local auto desc_md = lnav::pcre2pp::match_data::unitialized();
430✔
312

313
        auto desc_match_res = this->od_extractor.pp_value->capture_from(sf)
430✔
314
                                  .into(desc_md)
430✔
315
                                  .matches(PCRE2_NO_UTF_CHECK | PCRE2_ANCHORED)
860✔
316
                                  .ignore_error();
430✔
317
        if (desc_match_res) {
430✔
318
            return desc_md.to_string();
86✔
319
        }
320

321
        return std::nullopt;
344✔
322
    }
323
    return sf.to_string();
2,102✔
324
}
325

326
std::string
327
log_format::opid_descriptors::to_string(
1,076✔
328
    const lnav::map::small<size_t, std::string>& lod) const
329
{
330
    std::string retval;
1,076✔
331

332
    for (size_t lpc = 0; lpc < this->od_descriptors->size(); lpc++) {
2,199✔
333
        if (this->od_descriptors->at(lpc).od_prefix) {
1,123✔
334
            retval.append(this->od_descriptors->at(lpc).od_prefix.value());
×
335
        } else if (lpc > 0) {
1,123✔
336
            retval.append(" ");
47✔
337
        }
338
        auto val = lod.value_for(lpc);
1,123✔
339
        if (val) {
1,123✔
340
            retval.append(*val.value());
1,121✔
341
        }
342
        retval.append(this->od_descriptors->at(lpc).od_suffix);
1,123✔
343
    }
344

345
    return retval;
1,076✔
346
}
×
347

348
bool
349
logline_value_meta::is_numeric() const
×
350
{
351
    if (this->lvm_identifier || this->lvm_foreign_key) {
×
352
        return false;
×
353
    }
354
    switch (this->lvm_kind) {
×
355
        case value_kind_t::VALUE_FLOAT:
×
356
        case value_kind_t::VALUE_INTEGER:
357
            return true;
×
358
        default:
×
359
            return false;
×
360
    }
361
}
362

363
chart_type_t
364
logline_value_meta::to_chart_type() const
74✔
365
{
366
    auto retval = chart_type_t::hist;
74✔
367
    switch (this->lvm_kind) {
74✔
368
        case value_kind_t::VALUE_NULL:
9✔
369
            retval = chart_type_t::none;
9✔
370
            break;
9✔
371
        case value_kind_t::VALUE_INTEGER:
26✔
372
            if (!this->lvm_identifier && !this->lvm_foreign_key) {
26✔
373
                retval = chart_type_t::spectro;
21✔
374
            }
375
            break;
26✔
376
        case value_kind_t::VALUE_FLOAT:
4✔
377
            retval = chart_type_t::spectro;
4✔
378
            break;
4✔
379
        case value_kind_t::VALUE_ANY:
1✔
380
        case value_kind_t::VALUE_XML:
381
        case value_kind_t::VALUE_JSON:
382
        case value_kind_t::VALUE_BOOLEAN:
383
        case value_kind_t::VALUE_TIMESTAMP:
384
            retval = chart_type_t::none;
1✔
385
            break;
1✔
386
        default:
34✔
387
            break;
34✔
388
    }
389

390
    return retval;
74✔
391
}
392

393
std::string
394
logline_value_meta::to_humanized_value(int64_t i) const
26✔
395
{
396
    if (!this->lvm_unit_suffix.empty()
26✔
397
        || (this->lvm_unit_divisor != 0.0 && this->lvm_unit_divisor != 1.0))
26✔
398
    {
399
        double d = i;
3✔
400
        if (this->lvm_unit_divisor != 0.0 && this->lvm_unit_divisor != 1.0) {
3✔
401
            d /= this->lvm_unit_divisor;
×
402
        }
403
        return humanize::format(d, this->lvm_unit_suffix.to_string_fragment());
3✔
404
    }
405
    return fmt::to_string(i);
23✔
406
}
407

408
std::string
409
logline_value_meta::to_humanized_value(double d) const
30✔
410
{
411
    if (this->lvm_unit_divisor != 0.0 && this->lvm_unit_divisor != 1.0) {
30✔
412
        d /= this->lvm_unit_divisor;
×
413
    }
414
    return humanize::format(d, this->lvm_unit_suffix.to_string_fragment());
30✔
415
}
416

417
struct line_range
418
logline_value::origin_in_full_msg(const char* msg, ssize_t len) const
×
419
{
420
    if (this->lv_sub_offset == 0) {
×
421
        return this->lv_origin;
×
422
    }
423

424
    if (len == -1) {
×
425
        len = strlen(msg);
×
426
    }
427

428
    struct line_range retval = this->lv_origin;
×
429
    const char *last = msg, *msg_end = msg + len;
×
430

431
    for (int lpc = 0; lpc < this->lv_sub_offset; lpc++) {
×
432
        const auto* next = (const char*) memchr(last, '\n', msg_end - last);
×
433
        require(next != nullptr);
×
434

435
        next += 1;
×
436
        int amount = (next - last);
×
437

438
        retval.lr_start += amount;
×
439
        if (retval.lr_end != -1) {
×
440
            retval.lr_end += amount;
×
441
        }
442

443
        last = next + 1;
×
444
    }
445

446
    if (retval.lr_end == -1) {
×
447
        const auto* eol = (const char*) memchr(last, '\n', msg_end - last);
×
448

449
        if (eol == nullptr) {
×
450
            retval.lr_end = len;
×
451
        } else {
452
            retval.lr_end = eol - msg;
×
453
        }
454
    }
455

456
    return retval;
×
457
}
458

459
logline_value::logline_value(logline_value_meta lvm,
644,505✔
460
                             shared_buffer_ref& sbr,
461
                             struct line_range origin)
644,505✔
462
    : lv_meta(std::move(lvm)), lv_origin(origin)
644,505✔
463
{
464
    if (sbr.get_data() == nullptr) {
644,505✔
465
        this->lv_meta.lvm_kind = value_kind_t::VALUE_NULL;
×
466
    }
467

468
    switch (this->lv_meta.lvm_kind) {
644,505✔
469
        case value_kind_t::VALUE_ANY:
330,104✔
470
        case value_kind_t::VALUE_JSON:
471
        case value_kind_t::VALUE_XML:
472
        case value_kind_t::VALUE_STRUCT:
473
        case value_kind_t::VALUE_TEXT:
474
        case value_kind_t::VALUE_QUOTED:
475
        case value_kind_t::VALUE_W3C_QUOTED:
476
        case value_kind_t::VALUE_TIMESTAMP:
477
            require(origin.lr_end != -1);
330,104✔
478
            this->lv_frag = string_fragment::from_byte_range(
330,104✔
479
                sbr.get_data(), origin.lr_start, origin.lr_end);
330,104✔
480
            break;
330,104✔
481

482
        case value_kind_t::VALUE_NULL:
2✔
483
            break;
2✔
484

485
        case value_kind_t::VALUE_INTEGER: {
283,999✔
486
            auto scan_res
487
                = scn::scan_value<int64_t>(sbr.to_string_view(origin));
283,999✔
488
            if (scan_res) {
283,999✔
489
                this->lv_value.i = scan_res->value();
283,997✔
490
            } else {
491
                this->lv_value.i = 0;
2✔
492
            }
493
            break;
283,999✔
494
        }
495

496
        case value_kind_t::VALUE_FLOAT: {
30,400✔
497
            auto scan_res = scn::scan_value<double>(sbr.to_string_view(origin));
30,400✔
498
            if (scan_res) {
30,400✔
499
                this->lv_value.d = scan_res->value();
30,400✔
500
            } else {
501
                this->lv_value.d = 0;
×
502
            }
503
            break;
30,400✔
504
        }
505

506
        case value_kind_t::VALUE_BOOLEAN:
×
507
            if (strncmp(
×
508
                    sbr.get_data_at(origin.lr_start), "true", origin.length())
×
509
                    == 0
510
                || strncmp(
×
511
                       sbr.get_data_at(origin.lr_start), "yes", origin.length())
×
512
                    == 0)
513
            {
514
                this->lv_value.i = 1;
×
515
            } else {
516
                this->lv_value.i = 0;
×
517
            }
518
            break;
×
519

520
        case value_kind_t::VALUE_UNKNOWN:
×
521
        case value_kind_t::VALUE__MAX:
522
            ensure(0);
×
523
            break;
524
    }
525
}
644,505✔
526

527
void
528
logline_value::apply_scaling(const scaling_factor* sf)
45,648✔
529
{
530
    if (sf != nullptr) {
45,648✔
531
        switch (this->lv_meta.lvm_kind) {
×
532
            case value_kind_t::VALUE_INTEGER:
×
533
                sf->scale(this->lv_value.i);
×
534
                break;
×
535
            case value_kind_t::VALUE_FLOAT:
×
536
                sf->scale(this->lv_value.d);
×
537
                break;
×
538
            default:
×
539
                break;
×
540
        }
541
    }
542
}
45,648✔
543

544
std::string
545
logline_value::to_string() const
5,147✔
546
{
547
    char buffer[128];
548

549
    switch (this->lv_meta.lvm_kind) {
5,147✔
550
        case value_kind_t::VALUE_NULL:
47✔
551
            return "null";
94✔
552

553
        case value_kind_t::VALUE_ANY:
4,717✔
554
        case value_kind_t::VALUE_JSON:
555
        case value_kind_t::VALUE_XML:
556
        case value_kind_t::VALUE_STRUCT:
557
        case value_kind_t::VALUE_TEXT:
558
        case value_kind_t::VALUE_TIMESTAMP:
559
            if (this->lv_str) {
4,717✔
560
                return this->lv_str.value();
688✔
561
            }
562
            if (this->lv_frag.empty()) {
4,029✔
563
                return this->lv_intern_string.to_string();
45✔
564
            }
565
            return this->lv_frag.to_string();
3,984✔
566

567
        case value_kind_t::VALUE_QUOTED:
7✔
568
        case value_kind_t::VALUE_W3C_QUOTED:
569
            if (this->lv_frag.empty()) {
7✔
570
                return "";
×
571
            } else {
572
                switch (this->lv_frag.data()[0]) {
7✔
573
                    case '\'':
7✔
574
                    case '"': {
575
                        auto unquote_func = this->lv_meta.lvm_kind
14✔
576
                                == value_kind_t::VALUE_W3C_QUOTED
577
                            ? unquote_w3c
7✔
578
                            : unquote;
579
                        stack_buf allocator;
7✔
580
                        auto* unquoted_str
581
                            = allocator.allocate(this->lv_frag.length());
7✔
582
                        size_t unquoted_len;
583

584
                        unquoted_len = unquote_func(unquoted_str,
7✔
585
                                                    this->lv_frag.data(),
586
                                                    this->lv_frag.length());
7✔
587
                        return {unquoted_str, unquoted_len};
14✔
588
                    }
7✔
589
                    default:
×
590
                        return this->lv_frag.to_string();
×
591
                }
592
            }
593
            break;
594

595
        case value_kind_t::VALUE_INTEGER:
334✔
596
            snprintf(buffer, sizeof(buffer), "%" PRId64, this->lv_value.i);
334✔
597
            break;
334✔
598

599
        case value_kind_t::VALUE_FLOAT:
40✔
600
            snprintf(buffer, sizeof(buffer), "%lf", this->lv_value.d);
40✔
601
            break;
40✔
602

603
        case value_kind_t::VALUE_BOOLEAN:
2✔
604
            if (this->lv_value.i) {
2✔
605
                return "true";
×
606
            } else {
607
                return "false";
4✔
608
            }
609
            break;
610
        case value_kind_t::VALUE_UNKNOWN:
×
611
        case value_kind_t::VALUE__MAX:
612
            ensure(0);
×
613
            break;
614
    }
615

616
    return {buffer};
748✔
617
}
618

619
std::string
620
logline_value::to_humanized_string() const
74✔
621
{
622
    switch (this->lv_meta.lvm_kind) {
74✔
623
        case value_kind_t::VALUE_INTEGER:
26✔
624
            return this->lv_meta.to_humanized_value(this->lv_value.i);
26✔
625
        case value_kind_t::VALUE_FLOAT:
4✔
626
            return this->lv_meta.to_humanized_value(this->lv_value.d);
4✔
627
        default:
44✔
628
            return this->to_string();
44✔
629
    }
630
}
631

632
string_fragment
633
logline_value::to_string_fragment(ArenaAlloc::Alloc<char>& alloc) const
1,093✔
634
{
635
    char buffer[128];
636

637
    switch (this->lv_meta.lvm_kind) {
1,093✔
638
        case value_kind_t::VALUE_NULL:
×
639
            return "null"_frag;
×
640

641
        case value_kind_t::VALUE_ANY:
1,093✔
642
        case value_kind_t::VALUE_JSON:
643
        case value_kind_t::VALUE_XML:
644
        case value_kind_t::VALUE_STRUCT:
645
        case value_kind_t::VALUE_TEXT:
646
        case value_kind_t::VALUE_TIMESTAMP:
647
            if (this->lv_str) {
1,093✔
648
                return string_fragment::from_str(this->lv_str.value())
×
649
                    .to_owned(alloc);
×
650
            }
651
            if (this->lv_frag.empty()) {
1,093✔
652
                return this->lv_intern_string.to_string_fragment().to_owned(
×
653
                    alloc);
×
654
            }
655
            return this->lv_frag.to_owned(alloc);
1,093✔
656

657
        case value_kind_t::VALUE_QUOTED:
×
658
        case value_kind_t::VALUE_W3C_QUOTED:
659
            if (this->lv_frag.empty()) {
×
660
                return string_fragment{};
×
661
            } else {
662
                switch (this->lv_frag.data()[0]) {
×
663
                    case '\'':
×
664
                    case '"': {
665
                        auto unquote_func = this->lv_meta.lvm_kind
×
666
                                == value_kind_t::VALUE_W3C_QUOTED
667
                            ? unquote_w3c
×
668
                            : unquote;
669
                        stack_buf allocator;
×
670
                        auto* unquoted_str
671
                            = allocator.allocate(this->lv_frag.length());
×
672
                        size_t unquoted_len;
673

674
                        unquoted_len = unquote_func(unquoted_str,
×
675
                                                    this->lv_frag.data(),
676
                                                    this->lv_frag.length());
×
677
                        return string_fragment::from_bytes(unquoted_str,
×
678
                                                           unquoted_len)
679
                            .to_owned(alloc);
×
680
                    }
681
                    default:
×
682
                        return this->lv_frag.to_owned(alloc);
×
683
                }
684
            }
685
            break;
686

687
        case value_kind_t::VALUE_INTEGER:
×
688
            snprintf(buffer, sizeof(buffer), "%" PRId64, this->lv_value.i);
×
689
            break;
×
690

691
        case value_kind_t::VALUE_FLOAT:
×
692
            snprintf(buffer, sizeof(buffer), "%lf", this->lv_value.d);
×
693
            break;
×
694

695
        case value_kind_t::VALUE_BOOLEAN:
×
696
            if (this->lv_value.i) {
×
697
                return "true"_frag;
×
698
            }
699
            return "false"_frag;
×
700
            break;
701
        case value_kind_t::VALUE_UNKNOWN:
×
702
        case value_kind_t::VALUE__MAX:
703
            ensure(0);
×
704
            break;
705
    }
706

707
    return string_fragment::from_c_str(buffer).to_owned(alloc);
×
708
}
709

710
const char*
711
logline_value::text_value() const
68,837✔
712
{
713
    if (this->lv_str) {
68,837✔
714
        return this->lv_str->c_str();
351✔
715
    }
716
    if (this->lv_frag.empty()) {
68,486✔
717
        if (this->lv_intern_string.empty()) {
21✔
718
            return "";
21✔
719
        }
720
        return this->lv_intern_string.get();
×
721
    }
722
    return this->lv_frag.data();
68,465✔
723
}
724

725
size_t
726
logline_value::text_length() const
68,870✔
727
{
728
    if (this->lv_str) {
68,870✔
729
        return this->lv_str->size();
351✔
730
    }
731
    if (this->lv_frag.empty()) {
68,519✔
732
        return this->lv_intern_string.size();
21✔
733
    }
734
    return this->lv_frag.length();
68,498✔
735
}
736

737
string_fragment
738
logline_value::text_value_fragment() const
15✔
739
{
740
    return string_fragment::from_bytes(this->text_value(), this->text_length());
15✔
741
}
742

743
void
744
logline_value_vector::shift_origins_by(const line_range& cover, int32_t amount)
2✔
745
{
746
    for (auto& lv : this->lvv_values) {
14✔
747
        if (!lv.lv_origin.is_valid()) {
12✔
748
            continue;
×
749
        }
750
        lv.lv_origin.shift_range(cover, amount);
12✔
751
    }
752
}
2✔
753

754
void
755
logline_value_vector::clear()
42,322✔
756
{
757
    this->lvv_values.clear();
42,322✔
758
    this->lvv_sbr.disown();
42,322✔
759
    this->lvv_time_value = std::nullopt;
42,322✔
760
    this->lvv_time_exttm = std::nullopt;
42,322✔
761
    this->lvv_opid_value = std::nullopt;
42,322✔
762
    this->lvv_opid_provenance = opid_provenance::none;
42,322✔
763
    this->lvv_thread_id_value = std::nullopt;
42,322✔
764
    this->lvv_src_file_value = std::nullopt;
42,322✔
765
    this->lvv_src_line_value = std::nullopt;
42,322✔
766
    this->lvv_duration_value = std::nullopt;
42,322✔
767
}
42,322✔
768

769
logline_value_vector::logline_value_vector(const logline_value_vector& other)
2,017✔
770
    : lvv_sbr(other.lvv_sbr.clone()), lvv_values(other.lvv_values),
2,017✔
771
      lvv_time_value(other.lvv_time_value),
2,017✔
772
      lvv_time_exttm(other.lvv_time_exttm),
2,017✔
773
      lvv_opid_value(other.lvv_opid_value),
2,017✔
774
      lvv_opid_provenance(other.lvv_opid_provenance),
2,017✔
775
      lvv_thread_id_value(
776
          to_owned(other.lvv_thread_id_value, this->lvv_allocator)),
2,017✔
777
      lvv_src_file_value(
778
          to_owned(other.lvv_src_file_value, this->lvv_allocator)),
2,017✔
779
      lvv_src_line_value(
780
          to_owned(other.lvv_src_line_value, this->lvv_allocator)),
2,017✔
781
      lvv_duration_value(other.lvv_duration_value)
2,017✔
782
{
783
}
2,017✔
784

785
logline_value_vector&
786
logline_value_vector::operator=(const logline_value_vector& other)
962✔
787
{
788
    this->lvv_sbr = other.lvv_sbr.clone();
962✔
789
    this->lvv_values = other.lvv_values;
962✔
790
    this->lvv_time_value = other.lvv_time_value;
962✔
791
    this->lvv_time_exttm = other.lvv_time_exttm;
962✔
792
    this->lvv_opid_value = other.lvv_opid_value;
962✔
793
    this->lvv_opid_provenance = other.lvv_opid_provenance;
962✔
794
    this->lvv_thread_id_value
795
        = to_owned(other.lvv_thread_id_value, this->lvv_allocator);
962✔
796
    this->lvv_src_file_value
797
        = to_owned(other.lvv_src_file_value, this->lvv_allocator);
962✔
798
    this->lvv_src_line_value
799
        = to_owned(other.lvv_src_line_value, this->lvv_allocator);
962✔
800
    this->lvv_duration_value = other.lvv_duration_value;
962✔
801

802
    return *this;
962✔
803
}
804

805
std::vector<std::shared_ptr<log_format>> log_format::lf_root_formats;
806

807
date_time_scanner
808
log_format::build_time_scanner() const
117✔
809
{
810
    date_time_scanner retval;
117✔
811

812
    retval.set_base_time(this->lf_date_time.dts_base_time,
117✔
813
                         this->lf_date_time.dts_base_tm.et_tm);
117✔
814
    if (this->lf_date_time.dts_default_zone != nullptr) {
117✔
815
        retval.dts_default_zone = this->lf_date_time.dts_default_zone;
×
816
    }
817
    retval.dts_zoned_to_local = this->lf_date_time.dts_zoned_to_local;
117✔
818

819
    return retval;
117✔
820
}
821

822
std::vector<std::shared_ptr<log_format>>&
823
log_format::get_root_formats()
21,002✔
824
{
825
    return lf_root_formats;
21,002✔
826
}
827

828
void
829
external_log_format::update_op_description(
5,366✔
830
    const std::vector<opid_descriptors*>& desc_defs_vec,
831
    log_op_description& lod,
832
    const pattern* fpat,
833
    const lnav::pcre2pp::match_data& md)
834
{
835
    std::optional<std::string> desc_elem_str;
5,366✔
836
    if (!lod.lod_index) {
5,366✔
837
        for (const auto& desc_defs : desc_defs_vec) {
3,579✔
838
            if (lod.lod_index) {
1,465✔
839
                break;
34✔
840
            }
841
            for (const auto& desc_def : *desc_defs->od_descriptors) {
1,827✔
842
                auto desc_field_index_iter = fpat->p_value_name_to_index.find(
1,557✔
843
                    desc_def.od_field.pp_value);
1,557✔
844

845
                if (desc_field_index_iter == fpat->p_value_name_to_index.end())
1,557✔
846
                {
847
                    continue;
55✔
848
                }
849

850
                auto desc_cap_opt = md[desc_field_index_iter->second];
1,553✔
851
                if (!desc_cap_opt) {
1,553✔
852
                    continue;
51✔
853
                }
854

855
                desc_elem_str = desc_def.matches(desc_cap_opt.value());
1,502✔
856
                if (desc_elem_str) {
1,502✔
857
                    lod.lod_index = desc_defs->od_index;
1,161✔
858
                    break;
1,161✔
859
                }
860
            }
861
        }
862
    }
863
    if (lod.lod_index) {
5,366✔
864
        const auto& desc_def_v
865
            = *desc_defs_vec[lod.lod_index.value()]->od_descriptors;
4,379✔
866
        auto& desc_v = lod.lod_elements;
4,379✔
867

868
        if (desc_def_v.size() == desc_v.size()
4,379✔
869
            || (this->elf_opid_field.empty() && !desc_v.empty()))
4,379✔
870
        {
871
            return;
3,218✔
872
        }
873
        for (size_t desc_def_index = 0; desc_def_index < desc_def_v.size();
3,314✔
874
             desc_def_index++)
875
        {
876
            const auto& desc_def = desc_def_v[desc_def_index];
2,153✔
877
            auto found_desc = desc_v.value_for(desc_def_index);
2,153✔
878
            auto desc_field_index_iter
879
                = fpat->p_value_name_to_index.find(desc_def.od_field.pp_value);
2,153✔
880

881
            if (desc_field_index_iter == fpat->p_value_name_to_index.end()) {
2,153✔
882
                continue;
37✔
883
            }
884
            auto desc_cap_opt = md[desc_field_index_iter->second];
2,153✔
885
            if (!desc_cap_opt) {
2,153✔
886
                continue;
37✔
887
            }
888

889
            if (!desc_elem_str) {
2,116✔
890
                desc_elem_str = desc_def.matches(desc_cap_opt.value());
955✔
891
            }
892
            if (desc_elem_str) {
2,116✔
893
                if (!found_desc) {
2,113✔
894
                    desc_v.insert(desc_def_index, desc_elem_str.value());
2,113✔
895
                } else if (!desc_elem_str->empty()) {
×
896
                    found_desc.value()->append(desc_def.od_joiner);
×
897
                    found_desc.value()->append(desc_elem_str.value());
×
898
                }
899
            }
900
            desc_elem_str = std::nullopt;
2,116✔
901
        }
902
    }
903
}
5,366✔
904

905
void
906
external_log_format::update_op_description(
3,984✔
907
    const std::vector<opid_descriptors*>& desc_defs_vec,
908
    log_op_description& lod)
909
{
910
    std::optional<std::string> desc_elem_str;
3,984✔
911
    if (!lod.lod_index) {
3,984✔
912
        for (const auto& desc_defs : desc_defs_vec) {
4,017✔
913
            if (lod.lod_index) {
39✔
914
                break;
×
915
            }
916
            for (const auto& desc_def : *desc_defs->od_descriptors) {
39✔
917
                auto desc_cap_iter
918
                    = this->lf_desc_captures.find(desc_def.od_field.pp_value);
39✔
919

920
                if (desc_cap_iter == this->lf_desc_captures.end()) {
39✔
921
                    continue;
×
922
                }
923
                desc_elem_str = desc_def.matches(desc_cap_iter->second);
39✔
924
                if (desc_elem_str) {
39✔
925
                    lod.lod_index = desc_defs->od_index;
39✔
926
                    break;
39✔
927
                }
928
            }
929
        }
930
    }
931
    if (lod.lod_index) {
3,984✔
932
        const auto& desc_def_v
933
            = *desc_defs_vec[lod.lod_index.value()]->od_descriptors;
45✔
934
        auto& desc_v = lod.lod_elements;
45✔
935

936
        if (desc_def_v.size() == desc_v.size()
45✔
937
            || (this->elf_opid_field.empty() && !desc_v.empty()))
45✔
938
        {
939
            return;
6✔
940
        }
941
        for (size_t desc_def_index = 0; desc_def_index < desc_def_v.size();
114✔
942
             desc_def_index++)
943
        {
944
            const auto& desc_def = desc_def_v[desc_def_index];
75✔
945
            auto found_desc = desc_v.value_for(desc_def_index);
75✔
946
            auto desc_cap_iter
947
                = this->lf_desc_captures.find(desc_def.od_field.pp_value);
75✔
948
            if (desc_cap_iter == this->lf_desc_captures.end()) {
75✔
949
                continue;
×
950
            }
951

952
            if (!desc_elem_str) {
75✔
953
                desc_elem_str = desc_def.matches(desc_cap_iter->second);
36✔
954
            }
955
            if (desc_elem_str) {
75✔
956
                if (!found_desc) {
75✔
957
                    desc_v.insert(desc_def_index, desc_elem_str.value());
75✔
958
                } else if (!desc_elem_str->empty()) {
×
959
                    found_desc.value()->append(desc_def.od_joiner);
×
960
                    found_desc.value()->append(desc_elem_str.value());
×
961
                }
962
            }
963
            desc_elem_str = std::nullopt;
75✔
964
        }
965
    }
966
}
3,984✔
967

968
static bool
969
next_format(
2,806,112✔
970
    const std::vector<std::shared_ptr<external_log_format::pattern>>& patterns,
971
    int& index,
972
    int& locked_index)
973
{
974
    bool retval = true;
2,806,112✔
975

976
    if (locked_index == -1) {
2,806,112✔
977
        index += 1;
2,797,276✔
978
        if (index >= (int) patterns.size()) {
2,797,276✔
979
            retval = false;
849,020✔
980
        }
981
    } else if (index == locked_index) {
8,836✔
982
        retval = false;
1✔
983
    } else {
984
        index = locked_index;
8,835✔
985
    }
986

987
    return retval;
2,806,112✔
988
}
989

990
bool
991
log_format::next_format(const pcre_format* fmt, int& index, int& locked_index)
209,354✔
992
{
993
    bool retval = true;
209,354✔
994

995
    if (locked_index == -1) {
209,354✔
996
        index += 1;
209,147✔
997
        if (fmt[index].name == nullptr) {
209,147✔
998
            retval = false;
11,382✔
999
        }
1000
    } else if (index == locked_index) {
207✔
1001
        retval = false;
40✔
1002
    } else {
1003
        index = locked_index;
167✔
1004
    }
1005

1006
    return retval;
209,354✔
1007
}
1008

1009
const char*
1010
log_format::log_scanf(scan_batch_context& sbc,
14,759✔
1011
                      uint32_t line_number,
1012
                      string_fragment line,
1013
                      const pcre_format* fmt,
1014
                      const char* time_fmt[],
1015
                      exttm* tm_out,
1016
                      timeval* tv_out,
1017

1018
                      string_fragment* ts_out,
1019
                      std::optional<string_fragment>* level_out)
1020
{
1021
    int curr_fmt = -1;
14,759✔
1022
    const char* retval = nullptr;
14,759✔
1023
    bool done = false;
14,759✔
1024
    int pat_index = sbc.sbc_pattern_locks.last_pattern_index();
14,759✔
1025

1026
    while (!done && next_format(fmt, curr_fmt, pat_index)) {
212,691✔
1027
        thread_local auto md = lnav::pcre2pp::match_data::unitialized();
197,932✔
1028

1029
        auto match_res = fmt[curr_fmt]
197,932✔
1030
                             .pcre->capture_from(line)
197,932✔
1031
                             .into(md)
197,932✔
1032
                             .matches(PCRE2_NO_UTF_CHECK)
395,864✔
1033
                             .ignore_error();
197,932✔
1034
        if (!match_res) {
197,932✔
1035
            retval = nullptr;
171,179✔
1036
        } else {
1037
            auto ts = md[fmt[curr_fmt].pf_timestamp_index];
26,753✔
1038

1039
            retval = this->lf_date_time.scan(
26,753✔
1040
                ts->data(), ts->length(), nullptr, tm_out, *tv_out);
26,753✔
1041

1042
            if (retval == nullptr) {
26,753✔
1043
                auto ls = this->lf_date_time.unlock();
23,416✔
1044
                retval = this->lf_date_time.scan(
23,416✔
1045
                    ts->data(), ts->length(), nullptr, tm_out, *tv_out);
23,416✔
1046
                if (retval != nullptr) {
23,416✔
1047
                    auto old_flags
×
1048
                        = this->lf_timestamp_flags & DATE_TIME_SET_FLAGS;
×
1049
                    auto new_flags = tm_out->et_flags & DATE_TIME_SET_FLAGS;
×
1050

1051
                    // It is unlikely a valid timestamp would lose much
1052
                    // precision.
1053
                    if (new_flags != old_flags) {
×
1054
                        retval = nullptr;
×
1055
                    }
1056
                }
1057
                if (retval == nullptr) {
23,416✔
1058
                    this->lf_date_time.relock(ls);
23,416✔
1059
                } else {
1060
                    log_debug(
×
1061
                        "%d: changed time format to '%s' due to %.*s",
1062
                        line_number,
1063
                        PTIMEC_FORMAT_STR[this->lf_date_time.dts_fmt_lock],
1064
                        ts->length(),
1065
                        ts->data());
1066
                }
1067
            }
1068

1069
            if (retval) {
26,753✔
1070
                ts->sf_end = ts->sf_begin + this->lf_date_time.dts_fmt_len;
3,337✔
1071
                *ts_out = ts.value();
3,337✔
1072
                if (md[2]) {
3,337✔
1073
                    *level_out = md[2];
329✔
1074
                } else {
1075
                    *level_out = line.substr(md[0]->sf_end);
3,008✔
1076
                }
1077
                if (curr_fmt != pat_index) {
3,337✔
1078
                    uint32_t lock_line;
1079

1080
                    if (sbc.sbc_pattern_locks.empty()) {
3,210✔
1081
                        lock_line = 0;
3,210✔
1082
                    } else {
1083
                        lock_line = line_number;
×
1084
                    }
1085

1086
                    sbc.sbc_pattern_locks.pl_lines.emplace_back(lock_line,
3,210✔
1087
                                                                curr_fmt);
1088
                    sbc.sbc_pattern_locks.pl_lines.back().pfl_timestamp_flags
3,210✔
1089
                        = tm_out->et_flags;
3,210✔
1090
                }
1091
                this->lf_timestamp_flags = tm_out->et_flags;
3,337✔
1092
                done = true;
3,337✔
1093
            }
1094
        }
1095
    }
1096

1097
    return retval;
14,759✔
1098
}
1099

1100
void
1101
log_format::annotate(logfile* lf,
39,336✔
1102
                     uint64_t line_number,
1103
                     string_attrs_t& sa,
1104
                     logline_value_vector& values) const
1105
{
1106
    if (lf != nullptr && !values.lvv_opid_value) {
39,336✔
1107
        const auto& bm = lf->get_bookmark_metadata();
4,192✔
1108
        auto bm_iter = bm.find(line_number);
4,192✔
1109
        if (bm_iter != bm.end() && !bm_iter->second.bm_opid.empty()) {
4,192✔
1110
            values.lvv_opid_value = bm_iter->second.bm_opid;
28✔
1111
            values.lvv_opid_provenance
1112
                = logline_value_vector::opid_provenance::user;
28✔
1113
        }
1114
    }
1115
}
39,336✔
1116

1117
void
1118
log_format::check_for_new_year(std::vector<logline>& dst,
2,442✔
1119
                               exttm etm,
1120
                               timeval log_tv) const
1121
{
1122
    if (dst.empty()) {
2,442✔
1123
        return;
×
1124
    }
1125

1126
    time_t diff
1127
        = dst.back().get_time<std::chrono::seconds>().count() - log_tv.tv_sec;
2,442✔
1128
    int off_year = 0, off_month = 0, off_day = 0, off_hour = 0;
2,442✔
1129
    bool do_change = true;
2,442✔
1130

1131
    if (diff <= 0) {
2,442✔
1132
        return;
2,324✔
1133
    }
1134
    if ((etm.et_flags & ETF_MONTH_SET) && diff >= (24 * 60 * 60)) {
118✔
1135
        off_year = 1;
90✔
1136
    } else if (diff >= (24 * 60 * 60)) {
28✔
1137
        off_month = 1;
3✔
1138
    } else if (!(etm.et_flags & ETF_DAY_SET) && (diff >= (60 * 60))) {
25✔
1139
        off_day = 1;
4✔
1140
    } else if (!(etm.et_flags & ETF_HOUR_SET) && (diff >= 60)) {
21✔
1141
        off_hour = 1;
4✔
1142
    } else {
1143
        do_change = false;
17✔
1144
    }
1145

1146
    if (!do_change) {
118✔
1147
        return;
17✔
1148
    }
1149
    log_debug("%zu:detected time rollover; offsets=%d %d %d %d",
101✔
1150
              dst.size(),
1151
              off_year,
1152
              off_month,
1153
              off_day,
1154
              off_hour);
1155
    for (auto& ll : dst) {
254✔
1156
        time_t ot = ll.get_time<std::chrono::seconds>().count();
153✔
1157
        tm otm;
1158

1159
        gmtime_r(&ot, &otm);
153✔
1160
        otm.tm_yday = -1;
153✔
1161
        if (otm.tm_year < off_year) {
153✔
1162
            otm.tm_year = 0;
×
1163
        } else {
1164
            otm.tm_year -= off_year;
153✔
1165
        }
1166
        otm.tm_mon -= off_month;
153✔
1167
        if (otm.tm_mon < 0) {
153✔
1168
            otm.tm_mon += 12;
1✔
1169
        }
1170
        auto new_time = tm2sec(&otm);
153✔
1171
        if (new_time == -1) {
153✔
1172
            continue;
×
1173
        }
1174
        new_time -= (off_day * 24 * 60 * 60) + (off_hour * 60 * 60);
153✔
1175
        auto old_sub = ll.get_subsecond_time<std::chrono::microseconds>();
153✔
1176
        ll.set_time(std::chrono::seconds{new_time});
153✔
1177
        ll.set_subsecond_time(old_sub);
153✔
1178
    }
1179
}
1180

1181
/*
1182
 * XXX This needs some cleanup.
1183
 */
1184
struct json_log_userdata {
1185
    json_log_userdata(shared_buffer_ref& sbr, scan_batch_context* sbc)
14,717✔
1186
        : jlu_shared_buffer(sbr), jlu_batch_context(sbc)
14,717✔
1187
    {
1188
    }
14,717✔
1189

1190
    const external_log_format::value_def* get_field_def(
386,460✔
1191
        yajlpp_parse_context* ypc)
1192
    {
1193
        const auto field_frag = ypc->get_path_as_string_fragment();
386,460✔
1194
        auto* format = this->jlu_format;
386,460✔
1195

1196
        if (this->jlu_read_order_index < format->elf_value_def_read_order.size()
386,460✔
1197
            && format->elf_value_def_read_order[this->jlu_read_order_index]
535,256✔
1198
                    .first
1199
                == field_frag)
148,796✔
1200
        {
1201
            auto retval
1202
                = format->elf_value_def_read_order[this->jlu_read_order_index]
143,408✔
1203
                      .second;
143,408✔
1204
            if (retval != nullptr) {
143,408✔
1205
                this->jlu_precision += 1;
17,547✔
1206
            }
1207
            this->jlu_read_order_index += 1;
143,408✔
1208
            return retval;
143,408✔
1209
        }
1210

1211
        format->elf_value_def_read_order.resize(this->jlu_read_order_index);
243,052✔
1212
        auto vd_iter = format->elf_value_def_frag_map.find(field_frag);
243,052✔
1213
        if (vd_iter != format->elf_value_def_frag_map.end()) {
243,052✔
1214
            format->elf_value_def_read_order.emplace_back(vd_iter->first,
195,898✔
1215
                                                          vd_iter->second);
195,898✔
1216
            this->jlu_read_order_index += 1;
195,898✔
1217
            if (vd_iter->second != nullptr) {
195,898✔
1218
                this->jlu_precision += 1;
15,230✔
1219
            }
1220
            return vd_iter->second;
195,898✔
1221
        }
1222

1223
        auto owned_frag = field_frag.to_owned(format->elf_allocator);
47,154✔
1224
        format->elf_value_def_frag_map[owned_frag] = nullptr;
47,154✔
1225
        format->elf_value_def_read_order.emplace_back(owned_frag, nullptr);
47,154✔
1226
        this->jlu_read_order_index += 1;
47,154✔
1227
        return nullptr;
47,154✔
1228
    }
1229

1230
    void add_sub_lines_for(const external_log_format::value_def* vd,
321,133✔
1231
                           bool top_level,
1232
                           std::optional<double> val,
1233
                           const unsigned char* str,
1234
                           ssize_t len,
1235
                           yajl_string_props_t* props)
1236
    {
1237
        auto res = this->jlu_format->value_line_count(
321,133✔
1238
            *this->jlu_batch_context, vd, top_level, val, str, len, props);
321,133✔
1239
        this->jlu_has_ansi |= res.vlcr_has_ansi;
321,133✔
1240
        if (!res.vlcr_valid_utf) {
321,133✔
1241
            this->jlu_valid_utf = false;
×
1242
        }
1243
        this->jlu_sub_line_count += res.vlcr_count;
321,133✔
1244
        this->jlu_quality += res.vlcr_line_format_count;
321,133✔
1245
        if (res.vlcr_line_format_index) {
321,133✔
1246
            this->jlu_format_hits[res.vlcr_line_format_index.value()] = true;
4,227✔
1247
        }
1248
    }
321,133✔
1249

1250
    external_log_format* jlu_format{nullptr};
1251
    const logline* jlu_line{nullptr};
1252
    logline* jlu_base_line{nullptr};
1253
    int jlu_sub_line_count{1};
1254
    bool jlu_has_ansi{false};
1255
    bool jlu_valid_utf{true};
1256
    yajl_handle jlu_handle{nullptr};
1257
    const char* jlu_line_value{nullptr};
1258
    size_t jlu_line_size{0};
1259
    std::stack<size_t> jlu_sub_start;
1260
    uint32_t jlu_quality{0};
1261
    uint32_t jlu_strikes{0};
1262
    uint32_t jlu_precision{0};
1263
    std::vector<bool> jlu_format_hits;
1264
    shared_buffer_ref& jlu_shared_buffer;
1265
    scan_batch_context* jlu_batch_context;
1266
    std::optional<string_fragment> jlu_opid_frag;
1267
    std::optional<string_fragment> jlu_opid_desc_frag;
1268
    std::optional<string_fragment> jlu_tid_frag;
1269
    std::optional<int64_t> jlu_tid_number;
1270
    std::optional<std::string> jlu_subid;
1271
    std::optional<log_format::scan_error> jlu_scan_error;
1272
    hasher jlu_opid_hasher;
1273
    std::optional<std::chrono::microseconds> jlu_duration;
1274
    std::optional<std::chrono::microseconds> jlu_start_time;
1275
    std::optional<std::chrono::microseconds> jlu_end_time;
1276
    exttm jlu_exttm;
1277
    size_t jlu_read_order_index{0};
1278
    subline_options jlu_subline_opts;
1279
};
1280

1281
static int read_json_field(yajlpp_parse_context* ypc,
1282
                           const unsigned char* str,
1283
                           size_t len,
1284
                           yajl_string_props_t*);
1285

1286
static int
1287
read_json_null(yajlpp_parse_context* ypc)
4,599✔
1288
{
1289
    json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata;
4,599✔
1290
    const auto* vd = jlu->get_field_def(ypc);
4,599✔
1291

1292
    jlu->add_sub_lines_for(
9,198✔
1293
        vd, ypc->is_level(1), std::nullopt, nullptr, -1, nullptr);
4,599✔
1294

1295
    return 1;
4,599✔
1296
}
1297

1298
static int
1299
read_json_bool(yajlpp_parse_context* ypc, int val)
22,109✔
1300
{
1301
    json_log_userdata* jlu = (json_log_userdata*) ypc->ypc_userdata;
22,109✔
1302
    const auto* vd = jlu->get_field_def(ypc);
22,109✔
1303

1304
    jlu->add_sub_lines_for(
44,218✔
1305
        vd, ypc->is_level(1), std::nullopt, nullptr, -1, nullptr);
22,109✔
1306

1307
    return 1;
22,109✔
1308
}
1309

1310
static int
1311
read_json_number(yajlpp_parse_context* ypc,
37,910✔
1312
                 const char* numberVal,
1313
                 size_t numberLen)
1314
{
1315
    auto* jlu = (json_log_userdata*) ypc->ypc_userdata;
37,910✔
1316
    auto number_frag = string_fragment::from_bytes(numberVal, numberLen);
37,910✔
1317
    std::optional<double> val;
37,910✔
1318

1319
    intern_string_t field_name;
37,910✔
1320
    const auto* vd = jlu->get_field_def(ypc);
37,910✔
1321
    if (vd != nullptr) {
37,910✔
1322
        field_name = vd->vd_meta.lvm_name;
2,061✔
1323
    }
1324

1325
    if (field_name.empty()) {
37,910✔
1326
    } else if (jlu->jlu_format->lf_timestamp_field == field_name) {
2,061✔
1327
        long long divisor = jlu->jlu_format->elf_timestamp_divisor;
134✔
1328
        auto scan_res = scn::scan_value<double>(number_frag.to_string_view());
134✔
1329
        if (!scan_res) {
134✔
1330
            log_error("invalid number %.*s", (int) numberLen, numberVal);
×
1331
            return 0;
×
1332
        }
1333
        auto ts_val = scan_res.value().value();
134✔
1334
        timeval tv;
1335
        tv.tv_sec = ts_val / divisor;
134✔
1336
        tv.tv_usec = fmod(ts_val, divisor) * (1000000.0 / divisor);
134✔
1337
        jlu->jlu_end_time = to_us(tv);
134✔
1338
        jlu->jlu_format->lf_date_time.to_localtime(tv.tv_sec, jlu->jlu_exttm);
134✔
1339
        tv.tv_sec = tm2sec(&jlu->jlu_exttm.et_tm);
134✔
1340
        jlu->jlu_exttm.et_gmtoff
1341
            = jlu->jlu_format->lf_date_time.dts_local_offset_cache;
134✔
1342
        jlu->jlu_exttm.et_flags
134✔
1343
            |= ETF_MACHINE_ORIENTED | ETF_SUB_NOT_IN_FORMAT | ETF_ZONE_SET;
134✔
1344
        if (divisor == 1000) {
134✔
1345
            jlu->jlu_exttm.et_flags |= ETF_MILLIS_SET;
17✔
1346
        } else {
1347
            jlu->jlu_exttm.et_flags |= ETF_MICROS_SET;
117✔
1348
        }
1349
        jlu->jlu_exttm.et_nsec = tv.tv_usec * 1000;
134✔
1350
        if (tv.tv_sec < 0) {
134✔
1351
            jlu->jlu_scan_error = log_format::scan_error{
×
1352
                fmt::format(FMT_STRING("invalid numeric timestamp: {} / {} "
×
1353
                                       "(timestamp-divisor) = {}"),
1354
                            number_frag,
1355
                            divisor,
1356
                            tv.tv_sec),
1357
            };
1358
            return 1;
×
1359
        }
1360
        jlu->jlu_base_line->set_time(tv);
134✔
1361
    } else if (!jlu->jlu_format->lf_start_timestamp_field.empty()
1,927✔
1362
               && jlu->jlu_format->lf_start_timestamp_field == field_name)
1,927✔
1363
    {
1364
        long long divisor = jlu->jlu_format->elf_timestamp_divisor;
16✔
1365
        auto scan_res = scn::scan_value<double>(number_frag.to_string_view());
16✔
1366
        if (!scan_res) {
16✔
1367
            log_error("invalid number %.*s", (int) numberLen, numberVal);
×
1368
            return 0;
×
1369
        }
1370
        auto ts_val = scan_res.value().value();
16✔
1371
        if (ts_val < 0) {
16✔
1372
            jlu->jlu_scan_error = log_format::scan_error{
×
1373
                fmt::format(
1374
                    FMT_STRING("invalid numeric start-timestamp: {} / {} "
×
1375
                               "(timestamp-divisor) = {}"),
1376
                    number_frag,
1377
                    divisor,
1378
                    ts_val),
1379
            };
1380
            return 1;
×
1381
        }
1382
        jlu->jlu_start_time = std::chrono::microseconds(
32✔
1383
            static_cast<int64_t>(ts_val * 1000000.0 / divisor));
16✔
1384
    } else if (jlu->jlu_format->lf_subsecond_field == field_name) {
1,911✔
1385
        auto scan_res = scn::scan_value<double>(number_frag.to_string_view());
3✔
1386
        if (!scan_res) {
3✔
1387
            log_error("invalid number %.*s", (int) numberLen, numberVal);
×
1388
            return 0;
×
1389
        }
1390
        auto ts_val = scan_res.value().value();
3✔
1391

1392
        uint64_t millis = 0;
3✔
1393
        jlu->jlu_exttm.et_flags &= ~(ETF_MICROS_SET | ETF_MILLIS_SET);
3✔
1394
        switch (jlu->jlu_format->lf_subsecond_unit.value()) {
3✔
1395
            case log_format::subsecond_unit::milli:
×
1396
                millis = ts_val;
×
1397
                jlu->jlu_exttm.et_nsec = ts_val * 1000000;
×
1398
                jlu->jlu_exttm.et_flags |= ETF_MILLIS_SET;
×
1399
                break;
×
1400
            case log_format::subsecond_unit::micro:
×
1401
                millis = std::chrono::duration_cast<std::chrono::milliseconds>(
×
1402
                             std::chrono::microseconds((int64_t) ts_val))
×
1403
                             .count();
×
1404
                jlu->jlu_exttm.et_nsec = ts_val * 1000;
×
1405
                jlu->jlu_exttm.et_flags |= ETF_MICROS_SET;
×
1406
                break;
×
1407
            case log_format::subsecond_unit::nano:
3✔
1408
                millis = std::chrono::duration_cast<std::chrono::milliseconds>(
3✔
1409
                             std::chrono::nanoseconds((int64_t) ts_val))
3✔
1410
                             .count();
3✔
1411
                jlu->jlu_exttm.et_nsec = ts_val;
3✔
1412
                jlu->jlu_exttm.et_flags |= ETF_NANOS_SET;
3✔
1413
                break;
3✔
1414
        }
1415
        jlu->jlu_exttm.et_flags |= ETF_SUB_NOT_IN_FORMAT;
3✔
1416
        jlu->jlu_base_line->set_subsecond_time(
3✔
1417
            std::chrono::milliseconds(millis));
1418
    } else if (jlu->jlu_format->elf_level_field == field_name) {
1,908✔
1419
        if (jlu->jlu_format->elf_level_pairs.empty()) {
516✔
1420
            auto new_level = jlu->jlu_format->convert_level(
321✔
1421
                number_frag, jlu->jlu_batch_context);
1422
            if (new_level != LEVEL_UNKNOWN) {
321✔
1423
                jlu->jlu_base_line->set_level(new_level);
77✔
1424
            }
1425
        } else {
1426
            auto scan_res
1427
                = scn::scan_int<int64_t>(number_frag.to_string_view());
195✔
1428
            if (!scan_res) {
195✔
1429
                log_error("invalid number %.*s", (int) numberLen, numberVal);
×
1430
                return 0;
×
1431
            }
1432
            auto level_int = scan_res.value().value();
195✔
1433

1434
            for (const auto& pair : jlu->jlu_format->elf_level_pairs) {
705✔
1435
                if (pair.first == level_int) {
632✔
1436
                    jlu->jlu_base_line->set_level(pair.second);
122✔
1437
                    break;
122✔
1438
                }
1439
            }
1440
        }
1441
    } else if (vd != nullptr) {
1,392✔
1442
        if (jlu->jlu_format->elf_thread_id_field == field_name) {
1,392✔
1443
            auto& sbc = *jlu->jlu_batch_context;
124✔
1444
            auto tid_iter = sbc.sbc_tids.ltis_tid_ranges.find(number_frag);
124✔
1445
            if (tid_iter == sbc.sbc_tids.ltis_tid_ranges.end()) {
124✔
1446
                jlu->jlu_tid_frag = number_frag.to_owned(sbc.sbc_allocator);
71✔
1447
            } else {
1448
                jlu->jlu_tid_frag = tid_iter->first;
53✔
1449
            }
1450
        }
1451
        if ((vd->vd_meta.lvm_kind == value_kind_t::VALUE_INTEGER
1,392✔
1452
             || vd->vd_meta.lvm_kind == value_kind_t::VALUE_FLOAT)
168✔
1453
            && !vd->vd_meta.lvm_foreign_key && !vd->vd_meta.lvm_identifier)
1,250✔
1454
        {
1455
            auto scan_res
1456
                = scn::scan_value<double>(number_frag.to_string_view());
247✔
1457
            if (!scan_res) {
247✔
1458
                log_error("invalid number %.*s", (int) numberLen, numberVal);
×
1459
                return 0;
×
1460
            }
1461
            val = scan_res.value().value();
247✔
1462
            if (jlu->jlu_format->elf_duration_field == field_name) {
247✔
1463
                auto dur_secs
1464
                    = val.value() / jlu->jlu_format->elf_duration_divisor;
26✔
1465
                auto us = std::chrono::microseconds(
1466
                    static_cast<int64_t>(dur_secs * 1000000));
26✔
1467
                jlu->jlu_duration = std::max(us, 1us);
26✔
1468
            }
1469
        }
1470
    }
1471

1472
    jlu->add_sub_lines_for(vd,
37,910✔
1473
                           ypc->is_level(1),
37,910✔
1474
                           val,
1475
                           (const unsigned char*) numberVal,
1476
                           numberLen,
1477
                           nullptr);
1478

1479
    return 1;
37,910✔
1480
}
1481

1482
static int
1483
json_array_start(void* ctx)
68,701✔
1484
{
1485
    auto* ypc = (yajlpp_parse_context*) ctx;
68,701✔
1486
    auto* jlu = (json_log_userdata*) ypc->ypc_userdata;
68,701✔
1487

1488
    jlu->jlu_sub_start.push(yajl_get_bytes_consumed(jlu->jlu_handle) - 1);
68,701✔
1489
    if (ypc->ypc_path_index_stack.size() == 2) {
68,701✔
1490
        const auto* vd = jlu->get_field_def(ypc);
21,730✔
1491

1492
        jlu->add_sub_lines_for(vd, true, std::nullopt, nullptr, -1, nullptr);
21,730✔
1493
    }
1494

1495
    return 1;
68,701✔
1496
}
1497

1498
static int
1499
json_array_start_const(void* ctx)
5,131✔
1500
{
1501
    auto* ypc = (yajlpp_parse_context*) ctx;
5,131✔
1502
    auto* jlu = (json_log_userdata*) ypc->ypc_userdata;
5,131✔
1503

1504
    jlu->jlu_sub_start.push(yajl_get_bytes_consumed(jlu->jlu_handle) - 1);
5,131✔
1505

1506
    return 1;
5,131✔
1507
}
1508

1509
static int
1510
json_array_end(void* ctx)
5,119✔
1511
{
1512
    auto* ypc = (yajlpp_parse_context*) ctx;
5,119✔
1513
    auto* jlu = (json_log_userdata*) ypc->ypc_userdata;
5,119✔
1514
    const auto* vd = ypc->ypc_path_index_stack.size() > 1
5,119✔
1515
        ? jlu->get_field_def(ypc)
5,119✔
1516
        : nullptr;
5,119✔
1517

1518
    if (ypc->ypc_path_index_stack.size() == 1 || vd != nullptr) {
5,119✔
1519
        intern_string_t field_name;
1,394✔
1520
        if (vd != nullptr) {
1,394✔
1521
            field_name = vd->vd_meta.lvm_name;
×
1522
        } else {
1523
            field_name = ypc->get_path();
1,394✔
1524
        }
1525
        auto sub_start = jlu->jlu_sub_start.top();
1,394✔
1526
        size_t sub_end = yajl_get_bytes_consumed(jlu->jlu_handle);
1,394✔
1527
        auto json_frag = string_fragment::from_byte_range(
1,394✔
1528
            jlu->jlu_shared_buffer.get_data(), sub_start, sub_end);
1,394✔
1529
        jlu->jlu_format->jlf_line_values.lvv_values.emplace_back(
1,394✔
1530
            jlu->jlu_format->get_value_meta(field_name,
2,788✔
1531
                                            value_kind_t::VALUE_JSON),
1532
            json_frag);
1533
        if (field_name == jlu->jlu_format->elf_opid_field) {
1,394✔
1534
            jlu->jlu_opid_desc_frag = json_frag;
15✔
1535
        }
1536
    }
1537
    jlu->jlu_sub_start.pop();
5,119✔
1538

1539
    return 1;
5,119✔
1540
}
1541

1542
static int
1543
read_array_end(void* ctx)
68,485✔
1544
{
1545
    auto* ypc = (yajlpp_parse_context*) ctx;
68,485✔
1546
    auto* jlu = (json_log_userdata*) ypc->ypc_userdata;
68,485✔
1547
    const auto* vd = ypc->ypc_path_index_stack.size() > 1
68,485✔
1548
        ? jlu->get_field_def(ypc)
68,485✔
1549
        : nullptr;
68,485✔
1550

1551
    if (ypc->ypc_path_index_stack.size() == 1 || vd != nullptr) {
68,485✔
1552
        const intern_string_t field_name = ypc->get_path_fragment_i(0);
21,699✔
1553
        auto sub_start = jlu->jlu_sub_start.top();
21,699✔
1554
        jlu->jlu_sub_start.pop();
21,699✔
1555
        size_t sub_end = yajl_get_bytes_consumed(jlu->jlu_handle);
21,699✔
1556
        auto json_frag = string_fragment::from_byte_range(
21,699✔
1557
            jlu->jlu_shared_buffer.get_data(), sub_start, sub_end);
21,699✔
1558
        if (field_name == jlu->jlu_format->elf_opid_field) {
21,699✔
1559
            jlu->jlu_opid_desc_frag = json_frag;
2,796✔
1560
        }
1561
        if (ypc->ypc_path_index_stack.size() > 1 && vd != nullptr) {
21,699✔
1562
            jlu->add_sub_lines_for(
×
1563
                vd, false, std::nullopt, nullptr, -1, nullptr);
1564
        }
1565
    }
1566

1567
    return 1;
68,485✔
1568
}
1569

1570
static const json_path_container json_log_handlers = {
1571
    yajlpp::pattern_property_handler("\\w+")
1572
        .add_cb(read_json_null)
1573
        .add_cb(read_json_bool)
1574
        .add_cb(read_json_number)
1575
        .add_cb(read_json_field),
1576
};
1577

1578
static int rewrite_json_field(yajlpp_parse_context* ypc,
1579
                              const unsigned char* str,
1580
                              size_t len,
1581
                              yajl_string_props_t*);
1582

1583
static int
1584
rewrite_json_null(yajlpp_parse_context* ypc)
526✔
1585
{
1586
    auto jlu = (json_log_userdata*) ypc->ypc_userdata;
526✔
1587
    const auto* vd = jlu->get_field_def(ypc);
526✔
1588

1589
    if (!ypc->is_level(1) && vd == nullptr) {
526✔
1590
        return 1;
496✔
1591
    }
1592
    jlu->jlu_format->jlf_line_values.lvv_values.emplace_back(
30✔
1593
        jlu->jlu_format->get_value_meta(ypc, vd, value_kind_t::VALUE_NULL));
60✔
1594

1595
    return 1;
30✔
1596
}
1597

1598
static int
1599
rewrite_json_bool(yajlpp_parse_context* ypc, int val)
2,582✔
1600
{
1601
    auto* jlu = (json_log_userdata*) ypc->ypc_userdata;
2,582✔
1602
    const auto* vd = jlu->get_field_def(ypc);
2,582✔
1603

1604
    if (!ypc->is_level(1) && vd == nullptr) {
2,582✔
1605
        return 1;
2,251✔
1606
    }
1607
    jlu->jlu_format->jlf_line_values.lvv_values.emplace_back(
331✔
1608
        jlu->jlu_format->get_value_meta(ypc, vd, value_kind_t::VALUE_BOOLEAN),
331✔
1609
        (bool) val);
331✔
1610
    return 1;
331✔
1611
}
1612

1613
static int
1614
rewrite_json_int(yajlpp_parse_context* ypc, long long val)
3,742✔
1615
{
1616
    auto* jlu = (json_log_userdata*) ypc->ypc_userdata;
3,742✔
1617
    const auto* vd = jlu->get_field_def(ypc);
3,742✔
1618

1619
    if (vd != nullptr) {
3,742✔
1620
        const intern_string_t field_name = vd->vd_meta.lvm_name;
497✔
1621
        if (jlu->jlu_format->lf_timestamp_field == field_name) {
497✔
1622
            long long divisor = jlu->jlu_format->elf_timestamp_divisor;
25✔
1623
            timeval tv;
1624

1625
            tv.tv_sec = val / divisor;
25✔
1626
            tv.tv_usec = fmod(val, divisor) * (1000000.0 / divisor);
25✔
1627
            jlu->jlu_end_time = to_us(tv);
25✔
1628
            jlu->jlu_format->lf_date_time.to_localtime(tv.tv_sec,
25✔
1629
                                                       jlu->jlu_exttm);
25✔
1630
            jlu->jlu_exttm.et_gmtoff
1631
                = jlu->jlu_format->lf_date_time.dts_local_offset_cache;
25✔
1632
            jlu->jlu_exttm.et_flags |= ETF_MACHINE_ORIENTED
25✔
1633
                | ETF_SUB_NOT_IN_FORMAT | ETF_ZONE_SET | ETF_Z_FOR_UTC;
1634
            if (divisor == 1) {
25✔
1635
                jlu->jlu_exttm.et_flags |= ETF_MICROS_SET;
4✔
1636
            } else {
1637
                jlu->jlu_exttm.et_flags |= ETF_MILLIS_SET;
21✔
1638
            }
1639
            jlu->jlu_exttm.et_nsec = tv.tv_usec * 1000;
25✔
1640
        } else if (!jlu->jlu_format->lf_start_timestamp_field.empty()
472✔
1641
                   && jlu->jlu_format->lf_start_timestamp_field == field_name)
472✔
1642
        {
1643
            long long divisor = jlu->jlu_format->elf_timestamp_divisor;
3✔
1644
            jlu->jlu_start_time = std::chrono::microseconds(
6✔
1645
                static_cast<int64_t>(val * 1000000.0 / divisor));
3✔
1646
        } else if (jlu->jlu_format->lf_subsecond_field == field_name) {
469✔
1647
            jlu->jlu_exttm.et_flags &= ~(ETF_MICROS_SET | ETF_MILLIS_SET);
4✔
1648
            switch (jlu->jlu_format->lf_subsecond_unit.value()) {
4✔
1649
                case log_format::subsecond_unit::milli:
×
1650
                    jlu->jlu_exttm.et_nsec = val * 1000000;
×
1651
                    jlu->jlu_exttm.et_flags |= ETF_MILLIS_SET;
×
1652
                    break;
×
1653
                case log_format::subsecond_unit::micro:
×
1654
                    jlu->jlu_exttm.et_nsec = val * 1000;
×
1655
                    jlu->jlu_exttm.et_flags |= ETF_MICROS_SET;
×
1656
                    break;
×
1657
                case log_format::subsecond_unit::nano:
4✔
1658
                    jlu->jlu_exttm.et_nsec = val;
4✔
1659
                    jlu->jlu_exttm.et_flags |= ETF_NANOS_SET;
4✔
1660
                    break;
4✔
1661
            }
1662
            jlu->jlu_exttm.et_flags |= ETF_SUB_NOT_IN_FORMAT;
4✔
1663
        } else if (jlu->jlu_format->elf_duration_field == field_name) {
465✔
1664
            auto dur_secs = val / jlu->jlu_format->elf_duration_divisor;
2✔
1665
            jlu->jlu_duration
1666
                = std::max(1us,
2✔
1667
                           std::chrono::microseconds(
×
1668
                               static_cast<int64_t>(dur_secs * 1000000.0)));
4✔
1669
        }
1670
    }
1671

1672
    if (!ypc->is_level(1) && vd == nullptr) {
3,742✔
1673
        return 1;
3,230✔
1674
    }
1675
    if (vd != nullptr
512✔
1676
        && vd->vd_meta.lvm_name == jlu->jlu_format->elf_thread_id_field)
512✔
1677
    {
1678
        jlu->jlu_tid_number = val;
71✔
1679
    }
1680
    jlu->jlu_format->jlf_line_values.lvv_values.emplace_back(
512✔
1681
        jlu->jlu_format->get_value_meta(ypc, vd, value_kind_t::VALUE_INTEGER),
512✔
1682
        (int64_t) val);
512✔
1683
    return 1;
512✔
1684
}
1685

1686
static int
1687
rewrite_json_double(yajlpp_parse_context* ypc, double val)
96✔
1688
{
1689
    auto* jlu = (json_log_userdata*) ypc->ypc_userdata;
96✔
1690
    const auto* vd = jlu->get_field_def(ypc);
96✔
1691

1692
    if (vd != nullptr) {
96✔
1693
        const intern_string_t field_name = vd->vd_meta.lvm_name;
92✔
1694
        if (jlu->jlu_format->lf_timestamp_field == field_name) {
92✔
1695
            long long divisor = jlu->jlu_format->elf_timestamp_divisor;
69✔
1696
            timeval tv;
1697

1698
            tv.tv_sec = val / divisor;
69✔
1699
            tv.tv_usec = fmod(val, divisor) * (1000000.0 / divisor);
69✔
1700
            jlu->jlu_end_time = to_us(tv);
69✔
1701
            jlu->jlu_format->lf_date_time.to_localtime(tv.tv_sec,
69✔
1702
                                                       jlu->jlu_exttm);
69✔
1703
            jlu->jlu_exttm.et_gmtoff
1704
                = jlu->jlu_format->lf_date_time.dts_local_offset_cache;
69✔
1705
            jlu->jlu_exttm.et_flags |= ETF_MACHINE_ORIENTED
69✔
1706
                | ETF_SUB_NOT_IN_FORMAT | ETF_ZONE_SET | ETF_Z_FOR_UTC;
1707
            if (divisor == 1) {
69✔
1708
                jlu->jlu_exttm.et_flags |= ETF_MICROS_SET;
61✔
1709
            } else {
1710
                jlu->jlu_exttm.et_flags |= ETF_MILLIS_SET;
8✔
1711
            }
1712
            jlu->jlu_exttm.et_nsec = tv.tv_usec * 1000;
69✔
1713
        } else if (!jlu->jlu_format->lf_start_timestamp_field.empty()
23✔
1714
                   && jlu->jlu_format->lf_start_timestamp_field == field_name)
23✔
1715
        {
1716
            long long divisor = jlu->jlu_format->elf_timestamp_divisor;
×
1717
            jlu->jlu_start_time = std::chrono::microseconds(
×
1718
                static_cast<int64_t>(val * 1000000.0 / divisor));
1719
        } else if (jlu->jlu_format->lf_subsecond_field == field_name) {
23✔
1720
            jlu->jlu_exttm.et_flags &= ~(ETF_MICROS_SET | ETF_MILLIS_SET);
×
1721
            switch (jlu->jlu_format->lf_subsecond_unit.value()) {
×
1722
                case log_format::subsecond_unit::milli:
×
1723
                    jlu->jlu_exttm.et_nsec = val * 1000000;
×
1724
                    jlu->jlu_exttm.et_flags |= ETF_MILLIS_SET;
×
1725
                    break;
×
1726
                case log_format::subsecond_unit::micro:
×
1727
                    jlu->jlu_exttm.et_nsec = val * 1000;
×
1728
                    jlu->jlu_exttm.et_flags |= ETF_MICROS_SET;
×
1729
                    break;
×
1730
                case log_format::subsecond_unit::nano:
×
1731
                    jlu->jlu_exttm.et_nsec = val;
×
1732
                    jlu->jlu_exttm.et_flags |= ETF_NANOS_SET;
×
1733
                    break;
×
1734
            }
1735
            jlu->jlu_exttm.et_flags |= ETF_SUB_NOT_IN_FORMAT;
×
1736
        } else if (jlu->jlu_format->elf_duration_field == field_name) {
23✔
1737
            auto dur_secs = val / jlu->jlu_format->elf_duration_divisor;
23✔
1738
            jlu->jlu_duration
1739
                = std::max(1us,
23✔
1740
                           std::chrono::microseconds(
×
1741
                               static_cast<int64_t>(dur_secs * 1000000.0)));
46✔
1742
        }
1743
    }
1744

1745
    if (!ypc->is_level(1) && vd == nullptr) {
96✔
1746
        return 1;
×
1747
    }
1748
    jlu->jlu_format->jlf_line_values.lvv_values.emplace_back(
96✔
1749
        jlu->jlu_format->get_value_meta(ypc, vd, value_kind_t::VALUE_FLOAT),
192✔
1750
        val);
1751

1752
    return 1;
96✔
1753
}
1754

1755
static const json_path_container json_log_rewrite_handlers = {
1756
    yajlpp::pattern_property_handler("\\w+")
1757
        .add_cb(rewrite_json_null)
1758
        .add_cb(rewrite_json_bool)
1759
        .add_cb(rewrite_json_int)
1760
        .add_cb(rewrite_json_double)
1761
        .add_cb(rewrite_json_field),
1762
};
1763

1764
bool
1765
external_log_format::scan_for_partial(const log_format_file_state& lffs,
1✔
1766
                                      shared_buffer_ref& sbr,
1767
                                      size_t& len_out) const
1768
{
1769
    if (this->elf_type != elf_type_t::ELF_TYPE_TEXT) {
1✔
1770
        return false;
×
1771
    }
1772

1773
    const auto& pat
1774
        = this->elf_pattern_order[lffs.lffs_pattern_locks.last_pattern_index()];
1✔
1775
    if (!this->lf_multiline) {
1✔
1776
        len_out = pat->p_pcre.pp_value->match_partial(sbr.to_string_fragment());
1✔
1777
        return true;
1✔
1778
    }
1779

1780
    if (pat->p_timestamp_end == -1 || pat->p_timestamp_end > (int) sbr.length())
×
1781
    {
1782
        len_out = 0;
×
1783
        return false;
×
1784
    }
1785

1786
    len_out = pat->p_pcre.pp_value->match_partial(sbr.to_string_fragment());
×
1787
    return (int) len_out > pat->p_timestamp_end;
×
1788
}
1789

1790
std::vector<lnav::console::snippet>
1791
external_log_format::get_snippets() const
23✔
1792
{
1793
    std::vector<lnav::console::snippet> retval;
23✔
1794

1795
    for (const auto& src_pair : this->elf_format_sources) {
46✔
1796
        retval.emplace_back(lnav::console::snippet::from(src_pair.first, "")
46✔
1797
                                .with_line(src_pair.second));
23✔
1798
    }
1799

1800
    return retval;
23✔
1801
}
×
1802

1803
log_format::scan_result_t
1804
external_log_format::scan_json(std::vector<logline>& dst,
113,951✔
1805
                               const line_info& li,
1806
                               shared_buffer_ref& sbr,
1807
                               scan_batch_context& sbc)
1808
{
1809
    auto line_frag = sbr.to_string_fragment();
113,951✔
1810
    auto& ll = dst.back();
113,951✔
1811

1812
    if (!line_frag.startswith("{")) {
113,951✔
1813
        if (!this->lf_specialized) {
100,217✔
1814
            return scan_no_match{"line is not a JSON object"};
100,212✔
1815
        }
1816

1817
        ll.set_level(LEVEL_INVALID);
5✔
1818
        return scan_match{0};
5✔
1819
    }
1820

1821
    auto& ypc = *(this->jlf_parse_context);
13,734✔
1822
    yajl_handle handle = this->jlf_yajl_handle.get();
13,734✔
1823
    json_log_userdata jlu(sbr, &sbc);
13,734✔
1824

1825
    if (li.li_partial) {
13,734✔
1826
        log_debug("skipping partial line at offset %lld",
×
1827
                  li.li_file_range.fr_offset);
1828
        if (this->lf_specialized) {
×
1829
            ll.set_level(LEVEL_INVALID);
×
1830
        }
1831
        return scan_incomplete{};
×
1832
    }
1833

1834
    const auto* line_data = (const unsigned char*) sbr.get_data();
13,734✔
1835

1836
    this->lf_desc_captures.clear();
13,734✔
1837
    this->lf_desc_allocator.reset();
13,734✔
1838

1839
    ll.set_time(0us);
13,734✔
1840
    yajl_reset(handle);
13,734✔
1841
    ypc.set_static_handler(json_log_handlers.jpc_children[0]);
13,734✔
1842
    ypc.ypc_userdata = &jlu;
13,734✔
1843
    ypc.ypc_ignore_unused = true;
13,734✔
1844
    ypc.ypc_alt_callbacks.yajl_start_array = json_array_start;
13,734✔
1845
    ypc.ypc_alt_callbacks.yajl_start_map = json_array_start;
13,734✔
1846
    ypc.ypc_alt_callbacks.yajl_end_array = read_array_end;
13,734✔
1847
    ypc.ypc_alt_callbacks.yajl_end_map = read_array_end;
13,734✔
1848
    jlu.jlu_format = this;
13,734✔
1849
    jlu.jlu_base_line = &ll;
13,734✔
1850
    jlu.jlu_line_value = sbr.get_data();
13,734✔
1851
    jlu.jlu_line_size = sbr.length();
13,734✔
1852
    jlu.jlu_handle = handle;
13,734✔
1853
    jlu.jlu_format_hits.resize(this->jlf_line_format.size());
13,734✔
1854
    if (yajl_parse(handle, line_data, sbr.length()) == yajl_status_ok
13,734✔
1855
        && yajl_complete_parse(handle) == yajl_status_ok)
13,734✔
1856
    {
1857
        if (jlu.jlu_scan_error) {
13,549✔
1858
            if (this->lf_specialized) {
19✔
1859
                ll.set_level(LEVEL_INVALID);
10✔
1860
                return scan_match{};
10✔
1861
            }
1862
            return jlu.jlu_scan_error.value();
9✔
1863
        }
1864
        if (ll.get_time() == 0us) {
13,530✔
1865
            ll.set_ignore(true);
8,709✔
1866
            if (this->lf_specialized) {
8,709✔
1867
                return scan_match{};
×
1868
            }
1869

1870
            return scan_no_match{
8,709✔
1871
                "JSON message does not have expected timestamp property"};
8,709✔
1872
        }
1873

1874
        if (jlu.jlu_tid_frag) {
4,821✔
1875
            this->jlf_line_values.lvv_thread_id_value
1876
                = jlu.jlu_tid_frag->to_owned(
372✔
1877
                    this->jlf_line_values.lvv_allocator);
248✔
1878
            auto tid_iter = sbc.sbc_tids.insert_tid(
248✔
1879
                sbc.sbc_allocator, jlu.jlu_tid_frag.value(), ll.get_time<>());
124✔
1880
            tid_iter->second.titr_level_stats.update_msg_count(
124✔
1881
                ll.get_msg_level());
1882
            ll.merge_bloom_bits(jlu.jlu_tid_frag->bloom_bits());
124✔
1883
        } else {
1884
            auto tid_iter = sbc.sbc_tids.insert_tid(
4,697✔
1885
                sbc.sbc_allocator, string_fragment{}, ll.get_time<>());
4,697✔
1886
            tid_iter->second.titr_level_stats.update_msg_count(
4,697✔
1887
                ll.get_msg_level());
1888
        }
1889

1890
        if (jlu.jlu_start_time && jlu.jlu_end_time && !jlu.jlu_duration) {
4,821✔
1891
            if (jlu.jlu_end_time.value() > jlu.jlu_start_time.value()) {
16✔
1892
                jlu.jlu_duration
1893
                    = jlu.jlu_end_time.value() - jlu.jlu_start_time.value();
16✔
1894
            }
1895
        }
1896

1897
        auto found_opid_desc = false;
4,821✔
1898
        if (this->elf_opid_field.empty()
4,821✔
1899
            && this->lf_opid_source.value_or(opid_source_t::from_description)
826✔
1900
                == opid_source_t::from_description
1901
            && this->lf_opid_description_def->size() == 1)
5,647✔
1902
        {
1903
            const auto& od = this->lf_opid_description_def->begin()->second;
343✔
1904
            for (const auto& desc : *od.od_descriptors) {
1,029✔
1905
                auto desc_iter
1906
                    = this->lf_desc_captures.find(desc.od_field.pp_value);
686✔
1907
                if (desc_iter == this->lf_desc_captures.end()) {
686✔
1908
                    continue;
602✔
1909
                }
1910
                jlu.jlu_opid_hasher.update(desc_iter->second);
84✔
1911
                found_opid_desc = true;
84✔
1912
            }
1913

1914
        } else if (!jlu.jlu_opid_desc_frag && !jlu.jlu_opid_frag
6,160✔
1915
                   && jlu.jlu_duration)
6,160✔
1916
        {
1917
            jlu.jlu_opid_hasher.update(sbr.to_string_fragment());
3✔
1918
        }
1919

1920
        if (jlu.jlu_opid_desc_frag || jlu.jlu_duration
6,846✔
1921
            || (found_opid_desc && this->lf_opid_description_def->size() == 1))
6,846✔
1922
        {
1923
            char buf[hasher::STRING_SIZE];
1924
            jlu.jlu_opid_hasher.to_string(buf);
2,841✔
1925
            auto opid_frag = string_fragment::from_bytes(buf, sizeof(buf) - 1);
2,841✔
1926
            auto opid_iter = sbc.sbc_opids.los_opid_ranges.find(opid_frag);
2,841✔
1927
            if (opid_iter == sbc.sbc_opids.los_opid_ranges.end()) {
2,841✔
1928
                jlu.jlu_opid_frag = opid_frag.to_owned(sbc.sbc_allocator);
2,829✔
1929
            } else {
1930
                jlu.jlu_opid_frag = opid_iter->first;
12✔
1931
            }
1932
        }
1933

1934
        if (jlu.jlu_opid_frag) {
4,821✔
1935
            ll.merge_bloom_bits(jlu.jlu_opid_frag->bloom_bits());
3,984✔
1936
            this->jlf_line_values.lvv_opid_value
1937
                = jlu.jlu_opid_frag->to_string();
3,984✔
1938
            this->jlf_line_values.lvv_opid_provenance
1939
                = logline_value_vector::opid_provenance::file;
3,984✔
1940
            auto opid_iter = this->record_opid(jlu.jlu_opid_frag.value(),
7,968✔
1941
                                               jlu.jlu_duration.value_or(1us),
3,984✔
1942
                                               ll.get_time<>(),
1943
                                               ll.get_msg_level(),
1944
                                               sbc);
1945
            auto& elems = opid_iter->second.otr_description.lod_elements;
3,984✔
1946
            if (jlu.jlu_opid_desc_frag && elems.empty()) {
3,984✔
1947
                elems.insert(0,
×
1948
                             fmt::format(FMT_STRING(" {}"),
11,160✔
1949
                                         jlu.jlu_opid_desc_frag.value()));
1950
            }
1951

1952
            if (jlu.jlu_subid) {
3,984✔
1953
                auto subid_frag
1954
                    = string_fragment::from_str(jlu.jlu_subid.value());
×
1955

1956
                auto* ostr = sbc.sbc_opids.sub_op_in_use(sbc.sbc_allocator,
×
1957
                                                         opid_iter,
1958
                                                         subid_frag,
1959
                                                         ll.get_time<>(),
×
1960
                                                         ll.get_msg_level());
1961
                if (ostr != nullptr && ostr->ostr_description.empty()) {
×
1962
                    log_op_description sub_desc;
×
1963
                    this->update_op_description(
×
1964
                        *this->lf_subid_description_def_vec, sub_desc);
×
1965
                    if (!sub_desc.lod_elements.empty()) {
×
1966
                        auto& sub_desc_def
1967
                            = this->lf_subid_description_def_vec->at(
×
1968
                                sub_desc.lod_index.value());
×
1969
                        ostr->ostr_description
1970
                            = sub_desc_def->to_string(sub_desc.lod_elements);
×
1971
                    }
1972
                }
1973
            }
1974

1975
            auto& otr = opid_iter->second;
3,984✔
1976
            this->update_op_description(*this->lf_opid_description_def_vec,
3,984✔
1977
                                        otr.otr_description);
3,984✔
1978
        } else {
1979
            this->jlf_line_values.lvv_opid_value = std::nullopt;
837✔
1980
        }
1981

1982
        jlu.jlu_sub_line_count += this->jlf_line_format_init_count;
4,821✔
1983
        ll.set_has_ansi(jlu.jlu_has_ansi);
4,821✔
1984
        ll.set_valid_utf(jlu.jlu_valid_utf);
4,821✔
1985
        auto sub_ll = ll.clone();
4,821✔
1986
        sub_ll.set_continued(true);
4,821✔
1987
        for (int lpc = 1; lpc < jlu.jlu_sub_line_count; lpc++) {
25,217✔
1988
            sub_ll.set_sub_offset(lpc);
20,396✔
1989
            dst.emplace_back(std::move(sub_ll));
20,396✔
1990
        }
1991
        this->lf_timestamp_flags = jlu.jlu_exttm.et_flags;
4,821✔
1992

1993
        if (!this->lf_specialized) {
4,821✔
1994
            static const intern_string_t ts_field
1995
                = intern_string::lookup("__timestamp__", -1);
4,200✔
1996
            static const intern_string_t level_field
1997
                = intern_string::lookup("__level__");
6,056✔
1998
            static const intern_string_t duration_field
1999
                = intern_string::lookup("__duration__");
6,056✔
2000
            for (const auto& [index, jfe] :
39,068✔
2001
                 lnav::itertools::enumerate(this->jlf_line_format))
43,268✔
2002
            {
2003
                if (jfe.jfe_type != json_log_field::VARIABLE
93,006✔
2004
                    || jfe.jfe_value.pp_value == ts_field
22,495✔
2005
                    || jfe.jfe_value.pp_value == level_field
18,376✔
2006
                    || jfe.jfe_value.pp_value == duration_field
17,315✔
2007
                    || jfe.jfe_default_value != "-")
57,363✔
2008
                {
2009
                    continue;
23,270✔
2010
                }
2011
                if (!jlu.jlu_format_hits[index]) {
11,598✔
2012
                    jlu.jlu_strikes += 1;
11,045✔
2013
                }
2014
            }
2015
        }
2016
    } else {
2017
        unsigned char* msg;
2018
        int line_count = 2;
185✔
2019

2020
        msg = yajl_get_error(
370✔
2021
            handle, 1, (const unsigned char*) sbr.get_data(), sbr.length());
185✔
2022
        if (msg != nullptr) {
185✔
2023
            auto msg_frag = string_fragment::from_c_str(msg);
185✔
2024
            log_debug("Unable to parse line at offset %lld: %.*s",
185✔
2025
                      li.li_file_range.fr_offset,
2026
                      msg_frag.length(),
2027
                      msg_frag.data());
2028
            line_count = msg_frag.count('\n') + 1;
185✔
2029
            yajl_free_error(handle, msg);
185✔
2030
        }
2031
        if (!this->lf_specialized || dst.empty()) {
185✔
2032
            return scan_no_match{"JSON parsing failed"};
182✔
2033
        }
2034
        ll.set_has_ansi(jlu.jlu_has_ansi);
3✔
2035
        ll.set_valid_utf(jlu.jlu_valid_utf);
3✔
2036
        ll.set_level(LEVEL_INVALID);
3✔
2037
        auto sub_ll = ll.clone();
3✔
2038
        sub_ll.set_continued(true);
3✔
2039
        for (int lpc = 1; lpc < line_count; lpc++) {
12✔
2040
            sub_ll.set_sub_offset(lpc);
9✔
2041
            dst.emplace_back(std::move(sub_ll));
9✔
2042
        }
2043
    }
2044

2045
    if (jlu.jlu_quality > 0) {
4,824✔
2046
        jlu.jlu_quality += 3000;
951✔
2047
    }
2048
    return scan_match{jlu.jlu_quality, jlu.jlu_strikes, jlu.jlu_precision};
4,824✔
2049
}
13,734✔
2050

2051
static void
2052
ingest_numeric_value(const external_log_format::value_def& vd,
5,281✔
2053
                     string_fragment field_sf,
2054
                     std::optional<string_fragment> unit_sf,
2055
                     scan_batch_context& sbc)
2056
{
2057
    if (vd.vd_meta.lvm_identifier || vd.vd_meta.lvm_foreign_key) {
5,281✔
2058
        return;
163✔
2059
    }
2060
    if (!field_sf.is_valid()) {
5,204✔
2061
        return;
×
2062
    }
2063

2064
    std::optional<double> dvalue_opt;
5,204✔
2065
    switch (vd.vd_meta.lvm_kind) {
5,204✔
2066
        case value_kind_t::VALUE_INTEGER: {
4,891✔
2067
            int64_t ivalue;
2068
            auto from_res = fast_float::from_chars(
4,891✔
2069
                field_sf.begin(), field_sf.end(), ivalue);
2070
            if (from_res.ec == std::errc()) {
4,891✔
2071
                dvalue_opt = ivalue;
4,891✔
2072
            }
2073
            break;
4,891✔
2074
        }
2075
        case value_kind_t::VALUE_FLOAT: {
227✔
2076
            auto scan_res = scn::scan_value<double>(field_sf.to_string_view());
227✔
2077
            if (scan_res) {
227✔
2078
                dvalue_opt = scan_res->value();
227✔
2079
            }
2080
            break;
227✔
2081
        }
2082
        default:
86✔
2083
            break;
86✔
2084
    }
2085
    if (!dvalue_opt) {
5,204✔
2086
        return;
86✔
2087
    }
2088

2089
    auto dvalue = dvalue_opt.value();
5,118✔
2090
    if (unit_sf && unit_sf->is_valid()) {
5,118✔
2091
        auto unit_iter
2092
            = vd.vd_unit_scaling.find(intern_string::lookup(unit_sf.value()));
×
2093
        if (unit_iter != vd.vd_unit_scaling.end()) {
×
2094
            unit_iter->second.scale(dvalue);
×
2095
        }
2096
    }
2097
    sbc.sbc_value_stats[vd.vd_meta.lvm_values_index.value()].add_value(dvalue);
5,118✔
2098
}
2099

2100
log_opid_map::iterator
2101
external_log_format::record_opid(string_fragment opid_cap,
9,351✔
2102
                                 std::chrono::microseconds duration,
2103
                                 std::chrono::microseconds log_us,
2104
                                 log_level_t level,
2105
                                 scan_batch_context& sbc)
2106
{
2107
    auto iter = sbc.sbc_opids.insert_op(sbc.sbc_allocator,
9,351✔
2108
                                        opid_cap,
2109
                                        log_us,
2110
                                        this->lf_timestamp_point_of_reference,
2111
                                        duration);
2112
    iter->second.otr_level_stats.update_msg_count(level);
9,351✔
2113
    return iter;
9,351✔
2114
}
2115

2116
std::optional<log_opid_map::iterator>
2117
external_log_format::finalize_line(logline& new_line,
7,598✔
2118
                                   line_finalize_inputs& in,
2119
                                   scan_batch_context& sbc)
2120
{
2121
    const auto log_us = new_line.get_time();
7,598✔
2122
    const auto level = new_line.get_msg_level();
7,598✔
2123

2124
    if (!in.lfi_opid_cap && this->elf_opid_field.empty()
14,108✔
2125
        && !in.lfi_opid_desc_frags.empty())
14,108✔
2126
    {
2127
        hasher h;
4,086✔
2128
        for (const auto& frag : in.lfi_opid_desc_frags) {
12,209✔
2129
            h.update(frag);
8,123✔
2130
        }
2131
        h.to_string(in.lfi_synth_opid_buf);
4,086✔
2132
        in.lfi_opid_cap = string_fragment::from_bytes(
4,086✔
2133
            in.lfi_synth_opid_buf, sizeof(in.lfi_synth_opid_buf) - 1);
4,086✔
2134
    }
2135
    if (!in.lfi_opid_cap && (in.lfi_duration_cap || in.lfi_start_ts_cap)) {
7,598✔
2136
        hasher h;
193✔
2137
        h.update(in.lfi_line_sf);
193✔
2138
        h.to_string(in.lfi_synth_opid_buf);
193✔
2139
        in.lfi_opid_cap = string_fragment::from_bytes(
193✔
2140
            in.lfi_synth_opid_buf, sizeof(in.lfi_synth_opid_buf) - 1);
193✔
2141
    }
2142

2143
    std::optional<log_opid_map::iterator> opid_iter;
7,598✔
2144
    if (in.lfi_opid_cap && !in.lfi_opid_cap->empty()) {
7,598✔
2145
        auto duration = std::chrono::microseconds::zero();
5,367✔
2146
        if (in.lfi_duration_cap) {
5,367✔
2147
            auto from_res
2148
                = humanize::try_from<double>(in.lfi_duration_cap.value());
231✔
2149
            if (from_res) {
231✔
2150
                auto dur_secs = from_res->value / this->elf_duration_divisor;
231✔
2151
                duration = std::chrono::microseconds(
462✔
2152
                    static_cast<int64_t>(dur_secs * 1000000));
231✔
2153
            }
2154
        } else if (in.lfi_start_ts_cap) {
5,136✔
2155
            exttm start_tm;
×
2156
            timeval start_tv;
2157
            auto dts = this->build_time_scanner();
×
2158
            if (dts.scan(in.lfi_start_ts_cap->data(),
×
2159
                         in.lfi_start_ts_cap->length(),
×
2160
                         this->get_timestamp_formats(),
2161
                         &start_tm,
2162
                         start_tv))
2163
            {
2164
                auto start_us = to_us(start_tv);
×
2165
                if (log_us > start_us) {
×
2166
                    duration = log_us - start_us;
×
2167
                }
2168
            }
2169
        }
2170

2171
        opid_iter = this->record_opid(
×
2172
            in.lfi_opid_cap.value(), duration, log_us, level, sbc);
5,367✔
2173
        new_line.merge_bloom_bits(in.lfi_opid_cap->bloom_bits());
5,367✔
2174
    }
2175

2176
    if (this->elf_thread_id_field.empty()) {
7,598✔
2177
        if (in.lfi_terminated) {
4,461✔
2178
            auto tid_iter = sbc.sbc_tids.insert_tid(
4,459✔
NEW
2179
                sbc.sbc_allocator, string_fragment{}, log_us);
×
2180
            tid_iter->second.titr_level_stats.update_msg_count(level);
4,459✔
2181
        }
2182
    } else if (in.lfi_tid_cap) {
3,137✔
2183
        auto tid_iter = sbc.sbc_tids.insert_tid(
3,396✔
2184
            sbc.sbc_allocator, in.lfi_tid_cap.value(), log_us);
1,698✔
2185
        tid_iter->second.titr_level_stats.update_msg_count(level);
1,698✔
2186
        new_line.merge_bloom_bits(in.lfi_tid_cap->bloom_bits());
1,698✔
2187
    }
2188

2189
    if (in.lfi_src_file_cap && in.lfi_src_line_cap) {
7,598✔
2190
        auto h = hasher();
479✔
2191
        h.update(this->get_name().c_str());
479✔
2192
        h.update(in.lfi_src_file_cap.value());
479✔
2193
        h.update(in.lfi_src_line_cap.value());
479✔
2194
        new_line.merge_bloom_bits(h.to_bloom_bits());
479✔
2195
        new_line.set_schema_computed(true);
479✔
2196
    }
2197

2198
    return opid_iter;
15,196✔
2199
}
2200

2201
external_log_format::timestamp_outcome
2202
external_log_format::ingest_timestamp(string_fragment ts_sf,
7,595✔
2203
                                      const logfile* lf,
2204
                                      std::vector<logline>& dst,
2205
                                      exttm& log_time_tm,
2206
                                      timeval& log_tv,
2207
                                      scan_batch_context& sbc)
2208
{
2209
    const char* last = this->lf_date_time.scan(ts_sf.data(),
15,190✔
2210
                                               ts_sf.length(),
7,595✔
2211
                                               this->get_timestamp_formats(),
2212
                                               &log_time_tm,
2213
                                               log_tv);
2214
    if (last == nullptr) {
7,595✔
2215
        auto ls = this->lf_date_time.unlock();
13✔
2216
        last = this->lf_date_time.scan(ts_sf.data(),
26✔
2217
                                       ts_sf.length(),
13✔
2218
                                       this->get_timestamp_formats(),
2219
                                       &log_time_tm,
2220
                                       log_tv);
2221
        if (last == nullptr) {
13✔
2222
            this->lf_date_time.relock(ls);
2✔
2223
            return timestamp_outcome::no_parse;
3✔
2224
        }
2225
        auto old_flags = this->lf_timestamp_flags & DATE_TIME_SET_FLAGS;
11✔
2226
        auto new_flags = log_time_tm.et_flags & DATE_TIME_SET_FLAGS;
11✔
2227
        if (new_flags != old_flags) {
11✔
2228
            return timestamp_outcome::relock_mismatch;
1✔
2229
        }
2230
        if (lf != nullptr) {
10✔
2231
            log_debug("%s:%zu: date-time re-locked to %d",
10✔
2232
                      lf->get_unique_path().c_str(),
2233
                      dst.size(),
2234
                      this->lf_date_time.dts_fmt_lock);
2235
        }
2236
    }
2237

2238
    this->lf_timestamp_flags = log_time_tm.et_flags;
7,592✔
2239
    if (!sbc.sbc_pattern_locks.pl_lines.empty()) {
7,592✔
2240
        sbc.sbc_pattern_locks.pl_lines.back().pfl_timestamp_flags
7,512✔
2241
            = log_time_tm.et_flags;
7,512✔
2242
    }
2243

2244
    if (!(this->lf_timestamp_flags
15,184✔
2245
          & (ETF_MILLIS_SET | ETF_MICROS_SET | ETF_NANOS_SET))
7,592✔
2246
        && !dst.empty()
6,588✔
2247
        && dst.back().get_time<std::chrono::seconds>().count() == log_tv.tv_sec
6,588✔
2248
        && dst.back().get_subsecond_time<std::chrono::milliseconds>().count()
14,180✔
2249
            != 0)
2250
    {
2251
        auto log_ms
2252
            = dst.back().get_subsecond_time<std::chrono::microseconds>();
×
2253
        log_time_tm.et_nsec
2254
            = std::chrono::duration_cast<std::chrono::nanoseconds>(log_ms)
×
2255
                  .count();
×
2256
        log_tv.tv_usec
2257
            = std::chrono::duration_cast<std::chrono::microseconds>(log_ms)
×
2258
                  .count();
×
2259
    }
2260

2261
    if (!((log_time_tm.et_flags & ETF_DAY_SET)
7,592✔
2262
          && (log_time_tm.et_flags & ETF_MONTH_SET)
7,551✔
2263
          && (log_time_tm.et_flags & ETF_YEAR_SET)))
7,551✔
2264
    {
2265
        this->check_for_new_year(dst, log_time_tm, log_tv);
1,448✔
2266
    }
2267

2268
    return timestamp_outcome::ok;
7,592✔
2269
}
2270

2271
// Returns the canonical text of a CSV cell.  separated_string's
2272
// iterator preserves embedded `""` escape pairs verbatim; this
2273
// collapses them back to a single `"` when present.  The fast path
2274
// returns `field_sf` unchanged so cells without an embedded quote
2275
// stay zero-copy; only cells that actually need unescaping get
2276
// allocated out of `alloc`.
2277
template<typename A>
2278
static string_fragment
2279
unescape_csv_cell(string_fragment field_sf, A alloc)
24,788✔
2280
{
2281
    if (!field_sf.find('"').has_value()) {
24,788✔
2282
        return field_sf;
24,158✔
2283
    }
2284
    return string_fragment::from_str(
1,260✔
2285
               separated_string::unescape_quoted(field_sf))
2286
        .to_owned(alloc);
630✔
2287
}
2288

2289
log_format::scan_result_t
2290
external_log_format::scan_tabular(logfile& lf,
14,864✔
2291
                                  std::vector<logline>& dst,
2292
                                  const line_info& li,
2293
                                  shared_buffer_ref& sbr,
2294
                                  scan_batch_context& sbc)
2295
{
2296
    auto sf = sbr.to_string_fragment();
14,864✔
2297
    if (this->lf_specialized) {
14,864✔
2298
        auto& ll = dst.back();
108✔
2299
        if (li.li_file_range.fr_offset < this->tlf_header_end) {
108✔
2300
            // ignore header
2301
            ll.set_ignore(true);
22✔
2302
            return scan_match{1000};
22✔
2303
        }
2304

2305
        auto ss = separated_string(sf);
86✔
2306
        ss.ss_resume = std::exchange(this->tlf_suspended_state, std::nullopt);
86✔
2307
        if (ss.ss_resume.has_value()) {
86✔
2308
            ll.set_continued(true);
6✔
2309
            ll.set_ignore(!this->jlf_line_format.empty());
6✔
2310
        } else {
2311
            this->tlf_sub_lines = 1 + this->jlf_line_format_init_count;
80✔
2312
        }
2313
        ss.ss_separator = this->tlf_separator;
86✔
2314
        ss.ss_expected_count = this->elf_value_def_read_order.size();
86✔
2315
        line_finalize_inputs lfi;
86✔
2316
        lfi.lfi_line_sf = sf;
86✔
2317
        // Gather the column-name set used for opid synthesis when the format
2318
        // declares no explicit opid field but does declare an opid description.
2319
        std::set<intern_string_t> opid_desc_field_names;
86✔
2320
        if (!this->lf_opid_description_def->empty()) {
86✔
2321
            const auto& opid_def
2322
                = this->lf_opid_description_def->begin()->second;
4✔
2323
            for (const auto& desc : *opid_def.od_descriptors) {
12✔
2324
                opid_desc_field_names.insert(desc.od_field.pp_value);
8✔
2325
            }
2326
        }
2327
        size_t last_value_index = 0;
86✔
2328
        for (auto ss_iter = ss.begin(); ss_iter != ss.end(); ++ss_iter) {
743✔
2329
            auto field_sf = *ss_iter;
657✔
2330
            if (ss_iter.unterminated_quote()) {
657✔
2331
                this->tlf_suspended_state = ss_iter.suspend();
6✔
2332
                lfi.lfi_terminated = false;
6✔
2333
            }
2334
            const auto value_index = ss_iter.index();
657✔
2335
            last_value_index = value_index;
657✔
2336
            if (value_index >= this->elf_value_def_read_order.size()) {
657✔
2337
                break;
×
2338
            }
2339
            const auto* vd = this->elf_value_def_read_order[value_index].second;
657✔
2340
            if (vd == nullptr) {
657✔
2341
                continue;
48✔
2342
            }
2343

2344
            if (vd->vd_meta.lvm_values_index) {
609✔
2345
                auto& lvs
2346
                    = sbc.sbc_value_stats[vd->vd_meta.lvm_values_index.value()];
609✔
2347
                if (field_sf.length() > lvs.lvs_width) {
609✔
2348
                    lvs.lvs_width = field_sf.length();
214✔
2349
                }
2350
                if (vd->vd_meta.lvm_identifier && !field_sf.empty()) {
609✔
2351
                    lvs.add_text(field_sf);
209✔
2352
                }
2353
            }
2354
            // CSV cells may carry a `""`-escaped double-quote literal;
2355
            // collapse those before any downstream comparison or
2356
            // conversion so e.g. a level field of `"WARN"` matches and
2357
            // an opid of `"a""b"` becomes the canonical `a"b`.
2358
            auto canon_sf = unescape_csv_cell(field_sf, sbc.sbc_allocator);
609✔
2359
            if (vd->vd_meta.lvm_name == this->lf_timestamp_field) {
609✔
2360
                exttm log_time_tm;
80✔
2361
                timeval log_tv;
2362
                if (this->ingest_timestamp(
80✔
2363
                        canon_sf, &lf, dst, log_time_tm, log_tv, sbc)
2364
                    == timestamp_outcome::ok)
80✔
2365
                {
2366
                    ll.set_time(log_tv);
80✔
2367
                } else {
2368
                    ll.set_level(LEVEL_INVALID);
×
2369
                }
2370
            } else if (vd->vd_meta.lvm_name == this->elf_level_field) {
529✔
2371
                ll.set_level(this->convert_level(canon_sf, &sbc));
54✔
2372
            } else if (vd->vd_meta.lvm_name == this->elf_opid_field) {
475✔
2373
                if (!canon_sf.empty() && !canon_sf.is_one_of("-", "--")) {
50✔
2374
                    lfi.lfi_opid_cap = canon_sf;
43✔
2375
                }
2376
            } else if (vd->vd_meta.lvm_name == this->elf_thread_id_field) {
425✔
2377
                if (!canon_sf.empty() && !canon_sf.is_one_of("-", "--")) {
41✔
2378
                    lfi.lfi_tid_cap = canon_sf;
34✔
2379
                }
2380
            } else if (vd->vd_meta.lvm_name == this->elf_duration_field) {
384✔
2381
                if (!canon_sf.empty()) {
45✔
2382
                    lfi.lfi_duration_cap = canon_sf;
45✔
2383
                }
2384
            } else if (vd->vd_meta.lvm_name == this->lf_start_timestamp_field) {
339✔
2385
                if (!canon_sf.empty()) {
×
2386
                    lfi.lfi_start_ts_cap = canon_sf;
×
2387
                }
2388
            } else if (vd->vd_meta.lvm_name == this->elf_src_file_field) {
339✔
2389
                if (!canon_sf.empty()) {
41✔
2390
                    lfi.lfi_src_file_cap = canon_sf;
41✔
2391
                }
2392
            } else if (vd->vd_meta.lvm_name == this->elf_src_line_field) {
298✔
2393
                if (!canon_sf.empty()) {
41✔
2394
                    lfi.lfi_src_line_cap = canon_sf;
41✔
2395
                }
2396
            } else {
2397
                ingest_numeric_value(*vd, canon_sf, std::nullopt, sbc);
257✔
2398
            }
2399
            if (!canon_sf.empty()
609✔
2400
                && opid_desc_field_names.count(vd->vd_meta.lvm_name) > 0)
609✔
2401
            {
2402
                lfi.lfi_opid_desc_frags.push_back(canon_sf);
8✔
2403
            }
2404
        }
2405
        if (this->tlf_suspended_state) {
86✔
2406
            this->tlf_sub_lines += 1;
6✔
2407
        } else {
2408
            last_value_index += 1;
80✔
2409
        }
2410
        if (last_value_index >= this->elf_value_def_read_order.size()
86✔
2411
            || this->tlf_suspended_state.has_value())
86✔
2412
        {
2413
            this->finalize_line(dst.back(), lfi, sbc);
86✔
2414
        } else if (sf.startswith("#")) {
×
2415
            ll.set_ignore(true);
×
2416
        } else {
2417
            ll.set_level(LEVEL_INVALID);
×
2418
        }
2419

2420
        if (!this->jlf_line_format.empty()) {
86✔
2421
            static const intern_string_t body_name
2422
                = intern_string::lookup("body", -1);
37✔
2423
            size_t lpc = 0;
37✔
2424
            if (ss.ss_resume) {
37✔
2425
                lpc = ss.ss_resume->rs_index;
2✔
2426
            }
2427
            for (; lpc < this->elf_value_def_read_order.size()
254✔
2428
                 && lpc < last_value_index;
254✔
2429
                 lpc++)
2430
            {
2431
                const auto* vd = this->elf_value_def_read_order[lpc].second;
217✔
2432
                if (vd == nullptr) {
217✔
2433
                    if (!this->jlf_hide_extra) {
48✔
2434
                        this->tlf_sub_lines += 1;
48✔
2435
                    }
2436
                    continue;
48✔
2437
                }
2438
                if (!vd->vd_meta.is_hidden() && !vd->vd_line_format_index
303✔
2439
                    && vd->vd_meta.lvm_name != body_name)
303✔
2440
                {
2441
                    this->tlf_sub_lines += 1;
64✔
2442
                }
2443
            }
2444
            if (!this->tlf_suspended_state) {
37✔
2445
                auto sub_ll = ll.clone();
35✔
2446
                sub_ll.set_continued(true);
35✔
2447
                sub_ll.set_ignore(false);
35✔
2448
                for (int lpc = 1; lpc < this->tlf_sub_lines; lpc++) {
149✔
2449
                    sub_ll.set_sub_offset(lpc);
114✔
2450
                    dst.emplace_back(std::move(sub_ll));
114✔
2451
                }
2452
            }
2453
        }
2454

2455
        return scan_match{1000};
86✔
2456
    }
86✔
2457
    if (dst.size() > 20) {
14,756✔
2458
        return scan_no_match{"no tabular header found"};
8,878✔
2459
    }
2460

2461
    enum class tabular_header_state : uint8_t {
2462
        reading_metadata,
2463
        have_column_header,
2464
    };
2465

2466
    auto header_state = tabular_header_state::reading_metadata;
5,878✔
2467
    std::optional<char> sep;
5,878✔
2468
    for (auto ll_iter = lf.begin(); ll_iter != lf.end(); ++ll_iter) {
10,364✔
2469
        if (ll_iter->get_sub_offset() != 0) {
10,039✔
NEW
2470
            continue;
×
2471
        }
2472
        auto read_res = lf.read_raw_message(ll_iter);
10,039✔
2473
        if (read_res.isErr()) {
10,039✔
2474
            return scan_no_match{"cannot read header"};
×
2475
        }
2476

2477
        auto hdr_sbr = read_res.unwrap();
10,039✔
2478
        auto hdr_sf = hdr_sbr.to_string_fragment();
10,039✔
2479
        if (header_state == tabular_header_state::reading_metadata) {
10,039✔
2480
            // Excel-flavor CSVs sometimes start with `sep=<ch>` to
2481
            // hint the delimiter.  Consume that as metadata and wait
2482
            // for the real header on the next line.
2483
            if (hdr_sf.startswith("sep=")) {
10,039✔
2484
                const auto sep_sf = hdr_sf.substr(4);
5✔
2485
                if (sep_sf.empty()) {
5✔
2486
                    return scan_error{"sep= hint missing separator character"};
×
2487
                }
2488
                sep = sep_sf.data()[0];
5✔
2489
                log_info("  %ld:found 'sep=' header: %x",
10✔
2490
                         std::distance(lf.begin(), ll_iter),
2491
                         sep.value());
2492
            } else if (hdr_sf.startswith("#")) {
10,034✔
2493
                log_info("  %ld:comment header -- %.*s",
8,962✔
2494
                         std::distance(lf.begin(), ll_iter),
2495
                         hdr_sf.length(),
2496
                         hdr_sf.data());
2497
            } else {
2498
                if (!sep) {
5,553✔
2499
                    sep = separated_string::detect_separator(sf);
5,549✔
2500
                }
2501
                if (!sep) {
5,553✔
2502
                    return scan_no_match{"no separator found"};
3,191✔
2503
                }
2504

2505
                auto ss = separated_string(sf);
2,362✔
2506
                ss.ss_separator = sep.value();
2,362✔
2507
                uint32_t hits = 0, misses = 0;
2,362✔
2508

2509
                this->elf_value_def_read_order.clear();
2,362✔
2510
                for (auto hdr_name : ss) {
26,533✔
2511
                    // Header cells may be CSV-quoted (e.g. an export
2512
                    // wrapping a name that contains the separator or
2513
                    // doubled quotes); collapse `""` back to `"` so the
2514
                    // lookup matches what the format declares.
2515
                    auto canon_hdr
2516
                        = unescape_csv_cell(hdr_name, this->elf_allocator);
24,171✔
2517
                    auto value_iter
2518
                        = this->elf_value_def_frag_map.find(canon_hdr);
24,171✔
2519
                    if (value_iter != this->elf_value_def_frag_map.end()) {
24,171✔
2520
                        hits += 1;
158✔
2521
                        this->elf_value_def_read_order.emplace_back(
316✔
2522
                            value_iter->first, value_iter->second);
158✔
2523
                    } else {
2524
                        misses += 1;
24,013✔
2525
                        auto owned_name
2526
                            = canon_hdr.to_owned(this->elf_allocator);
24,013✔
2527
                        this->elf_value_def_read_order.emplace_back(owned_name,
24,013✔
2528
                                                                    nullptr);
24,013✔
2529
                    }
2530
                }
2531
                if (hits <= 2) {
2,362✔
2532
                    return scan_no_match{"not enough columns matched"};
2,340✔
2533
                }
2534

2535
                for (auto prev_iter = lf.begin(); prev_iter != ll_iter;
34✔
2536
                     ++prev_iter)
12✔
2537
                {
2538
                    prev_iter->set_time(std::chrono::microseconds::zero());
12✔
2539
                    prev_iter->set_level(LEVEL_UNKNOWN);
12✔
2540
                    prev_iter->set_ignore(true);
12✔
2541
                }
2542

2543
                log_info("  %ld:found column header -- %.*s",
44✔
2544
                         std::distance(lf.begin(), ll_iter),
2545
                         hdr_sf.length(),
2546
                         hdr_sf.data());
2547

2548
                header_state = tabular_header_state::have_column_header;
22✔
2549
                ll_iter->set_ignore(true);
22✔
2550
                ll_iter->set_level(LEVEL_INVALID);
22✔
2551
                this->tlf_separator = sep.value();
22✔
2552
                this->tlf_header_end = li.li_file_range.next_offset();
22✔
2553
                this->tlf_extra_count = misses;
22✔
2554
                this->tlf_suspended_state = std::nullopt;
22✔
2555
                return scan_match{1000, misses, hits};
22✔
2556
            }
2557
        }
2558
    }
15,592✔
2559

2560
    return scan_no_match{"no tabular header found"};
325✔
2561
}
2562

2563
log_format::scan_result_t
2564
external_log_format::scan(logfile& lf,
981,634✔
2565
                          std::vector<logline>& dst,
2566
                          const line_info& li,
2567
                          shared_buffer_ref& sbr,
2568
                          scan_batch_context& sbc)
2569
{
2570
    if (dst.size() == 1) {
981,634✔
2571
        auto file_options = lf.get_file_options();
106,667✔
2572

2573
        if (file_options) {
106,667✔
2574
            this->lf_date_time.dts_default_zone
2575
                = file_options->second.fo_default_zone.pp_value;
4,320✔
2576
        } else {
2577
            this->lf_date_time.dts_default_zone = nullptr;
102,347✔
2578
        }
2579
    }
106,667✔
2580

2581
    sbc.sbc_value_stats.resize(this->elf_value_defs.size());
981,634✔
2582

2583
    if (this->elf_type == elf_type_t::ELF_TYPE_TABULAR) {
981,634✔
2584
        return this->scan_tabular(lf, dst, li, sbr, sbc);
14,864✔
2585
    }
2586

2587
    if (this->elf_type == elf_type_t::ELF_TYPE_JSON) {
966,770✔
2588
        return this->scan_json(dst, li, sbr, sbc);
110,237✔
2589
    }
2590

2591
    int curr_fmt = -1, orig_lock = sbc.sbc_pattern_locks.last_pattern_index();
856,533✔
2592
    int pat_index = orig_lock;
856,533✔
2593
    auto line_sf = sbr.to_string_fragment();
856,533✔
2594
    thread_local auto md = lnav::pcre2pp::match_data::unitialized();
856,533✔
2595

2596
    while (::next_format(this->elf_pattern_order, curr_fmt, pat_index)) {
2,806,112✔
2597
        auto* fpat = this->elf_pattern_order[curr_fmt].get();
1,957,091✔
2598
        auto* pat = fpat->p_pcre.pp_value.get();
1,957,091✔
2599

2600
        auto found_match
2601
            = pat->capture_from(line_sf).into(md).found_p(PCRE2_NO_UTF_CHECK);
1,957,091✔
2602
        if (!found_match) {
1,957,091✔
2603
            if (!sbc.sbc_pattern_locks.empty() && pat_index != -1) {
1,949,576✔
2604
                curr_fmt = -1;
2,050✔
2605
                pat_index = -1;
2,050✔
2606
            }
2607
            continue;
1,949,579✔
2608
        }
2609

2610
        auto pushed_pattern_lock = false;
7,515✔
2611
        if (orig_lock != curr_fmt) {
7,515✔
2612
            uint32_t lock_line;
2613

2614
            if (!this->lf_specialized && orig_lock != -1) {
730✔
2615
                log_debug("%s:%zu: changing pattern lock %d -> (%d)%s",
×
2616
                          lf.get_unique_path().c_str(),
2617
                          dst.size() - 1,
2618
                          orig_lock,
2619
                          curr_fmt,
2620
                          this->elf_pattern_order[curr_fmt]->p_name.c_str());
2621
            }
2622
            if (sbc.sbc_pattern_locks.empty()) {
730✔
2623
                lock_line = 0;
704✔
2624
            } else {
2625
                lock_line = dst.size() - 1;
26✔
2626
            }
2627
            sbc.sbc_pattern_locks.pl_lines.emplace_back(lock_line, curr_fmt);
730✔
2628
            pushed_pattern_lock = true;
730✔
2629
        }
2630

2631
        auto ts = md[fpat->p_timestamp_field_index];
7,515✔
2632
        auto level_cap = md[fpat->p_level_field_index];
7,515✔
2633
        auto opid_cap = md[fpat->p_opid_field_index];
7,515✔
2634
        exttm log_time_tm;
7,515✔
2635
        timeval log_tv;
2636
        char combined_datetime_buf[512];
2637

2638
        if (fpat->p_time_field_index != -1) {
7,515✔
2639
            auto time_cap = md[fpat->p_time_field_index];
×
2640
            if (ts && time_cap) {
×
2641
                auto ts_str_len = snprintf(combined_datetime_buf,
×
2642
                                           sizeof(combined_datetime_buf),
2643
                                           "%.*sT%.*s",
2644
                                           ts->length(),
2645
                                           ts->data(),
2646
                                           time_cap->length(),
2647
                                           time_cap->data());
2648
                ts = string_fragment::from_bytes(combined_datetime_buf,
×
2649
                                                 ts_str_len);
×
2650
            }
2651
        }
2652

2653
        auto level = this->convert_level(
7,515✔
2654
            level_cap.value_or(string_fragment::invalid()), &sbc);
7,515✔
2655

2656
        if (!ts) {
7,515✔
2657
            level = log_level_t::LEVEL_INVALID;
×
2658
        } else {
2659
            auto outcome = this->ingest_timestamp(
7,515✔
2660
                *ts, &lf, dst, log_time_tm, log_tv, sbc);
7,515✔
2661
            if (outcome != timestamp_outcome::ok) {
7,515✔
2662
                if (pushed_pattern_lock) {
3✔
2663
                    sbc.sbc_pattern_locks.pl_lines.pop_back();
2✔
2664
                }
2665
                continue;
3✔
2666
            }
2667
        }
2668

2669
        auto log_us = to_us(log_tv);
7,512✔
2670
        for (const auto& ivd : fpat->p_value_by_index) {
77,265✔
2671
            if (!ivd.ivd_value_def->vd_meta.lvm_values_index) {
69,753✔
2672
                continue;
13,078✔
2673
            }
2674

2675
            ssize_t cap_size = md.capture_size(ivd.ivd_index);
56,675✔
2676
            auto& lvs = sbc.sbc_value_stats[ivd.ivd_value_def->vd_meta
56,675✔
2677
                                                .lvm_values_index.value()];
56,675✔
2678

2679
            if (cap_size > lvs.lvs_width) {
56,675✔
2680
                lvs.lvs_width = cap_size;
7,457✔
2681
            }
2682
            // Identifier fields are explicitly excluded from numeric
2683
            // ingest in `ingest_numeric_value`; route them to the
2684
            // distinct-count estimator instead so columns like opid,
2685
            // hostname, request_id surface a useful cardinality.
2686
            if (ivd.ivd_value_def->vd_meta.lvm_identifier) {
56,675✔
2687
                if (auto cap = md[ivd.ivd_index]) {
37,614✔
2688
                    lvs.add_text(*cap);
29,545✔
2689
                }
2690
            }
2691
        }
2692

2693
        for (auto value_index : fpat->p_numeric_value_indexes) {
12,610✔
2694
            const indexed_value_def& ivd = fpat->p_value_by_index[value_index];
5,098✔
2695
            auto num_cap = md[ivd.ivd_index];
5,098✔
2696
            if (!num_cap) {
5,098✔
2697
                continue;
74✔
2698
            }
2699
            auto unit_cap = ivd.ivd_unit_field_index >= 0
5,024✔
2700
                ? md[ivd.ivd_unit_field_index]
5,024✔
2701
                : std::nullopt;
5,024✔
2702
            ingest_numeric_value(*ivd.ivd_value_def, *num_cap, unit_cap, sbc);
5,024✔
2703
        }
2704

2705
        auto& new_line = dst.back();
7,512✔
2706
        new_line.set_time(log_us);
7,512✔
2707
        new_line.set_level(level);
7,512✔
2708

2709
        line_finalize_inputs lfi;
7,512✔
2710
        lfi.lfi_opid_cap = opid_cap;
7,512✔
2711
        lfi.lfi_tid_cap = md[fpat->p_thread_id_field_index];
7,512✔
2712
        lfi.lfi_duration_cap = md[fpat->p_duration_field_index];
7,512✔
2713
        lfi.lfi_start_ts_cap = md[fpat->p_start_timestamp_field_index];
7,512✔
2714
        lfi.lfi_src_file_cap = md[fpat->p_src_file_field_index];
7,512✔
2715
        lfi.lfi_src_line_cap = md[fpat->p_src_line_field_index];
7,512✔
2716
        lfi.lfi_line_sf = line_sf;
7,512✔
2717
        for (const auto& fidx : fpat->p_opid_description_field_indexes) {
17,049✔
2718
            auto desc_cap = md[fidx];
9,537✔
2719
            if (desc_cap) {
9,537✔
2720
                lfi.lfi_opid_desc_frags.push_back(desc_cap.value());
9,488✔
2721
            }
2722
        }
2723
        auto opid_iter_opt = this->finalize_line(new_line, lfi, sbc);
7,512✔
2724
        if (opid_iter_opt) {
7,512✔
2725
            auto opid_iter = *opid_iter_opt;
5,313✔
2726
            auto& otr = opid_iter->second;
5,313✔
2727
            if (fpat->p_subid_field_index != -1) {
5,313✔
2728
                auto subid_cap = md[fpat->p_subid_field_index];
78✔
2729
                if (subid_cap && !subid_cap->empty()) {
78✔
2730
                    auto* ostr = sbc.sbc_opids.sub_op_in_use(sbc.sbc_allocator,
234✔
2731
                                                             opid_iter,
2732
                                                             subid_cap.value(),
78✔
2733
                                                             log_us,
2734
                                                             level);
2735
                    if (ostr != nullptr && ostr->ostr_description.empty()) {
78✔
2736
                        log_op_description sub_desc;
53✔
2737
                        this->update_op_description(
53✔
2738
                            *this->lf_subid_description_def_vec,
53✔
2739
                            sub_desc,
2740
                            fpat,
2741
                            md);
2742
                        if (!sub_desc.lod_elements.empty()) {
53✔
2743
                            auto& sub_desc_def
2744
                                = this->lf_subid_description_def_vec->at(
51✔
2745
                                    sub_desc.lod_index.value());
51✔
2746
                            ostr->ostr_description = sub_desc_def->to_string(
102✔
2747
                                sub_desc.lod_elements);
51✔
2748
                        }
2749
                    }
53✔
2750
                }
2751
            }
2752
            this->update_op_description(*this->lf_opid_description_def_vec,
5,313✔
2753
                                        otr.otr_description,
5,313✔
2754
                                        fpat,
2755
                                        md);
2756
        }
2757
        return scan_match{1000};
7,512✔
2758
    }
7,512✔
2759

2760
    if (this->lf_specialized && !this->lf_multiline) {
849,021✔
2761
        return scan_error{"line does not match any patterns"};
2✔
2762
    }
2763

2764
    return scan_no_match{"no patterns matched"};
849,020✔
2765
}
2766

2767
void
2768
external_log_format::annotate(logfile* lf,
8,075✔
2769
                              uint64_t line_number,
2770
                              string_attrs_t& sa,
2771
                              logline_value_vector& values) const
2772
{
2773
    thread_local auto md = lnav::pcre2pp::match_data::unitialized();
8,075✔
2774

2775
    auto& line = values.lvv_sbr;
8,075✔
2776

2777
    line.erase_ansi();
8,075✔
2778
    if (this->elf_type == elf_type_t::ELF_TYPE_TABULAR
16,150✔
2779
        && this->jlf_line_format.empty())
8,075✔
2780
    {
2781
        auto ll_iter = std::next(lf->begin(), line_number);
46✔
2782

2783
        if (ll_iter->is_continued()) {
46✔
2784
            // XXX read previous lines so we can do a proper annotation
2785
            return;
2✔
2786
        }
2787
        // Tabular format without line-format: get_subline left the row
2788
        // raw, so parse it here directly.
2789
        auto sf = line.to_string_fragment();
44✔
2790
        auto ss = separated_string(sf);
44✔
2791
        ss.ss_separator = this->tlf_separator;
44✔
2792
        for (auto it = ss.begin(); it != ss.end(); ++it) {
480✔
2793
            if (it.index() >= this->elf_value_def_read_order.size()) {
436✔
2794
                break;
×
2795
            }
2796
            this->process_csv_cell(values, &sa, it, line);
436✔
2797
        }
2798

2799
        return;
44✔
2800
    }
2801

2802
    if (this->elf_type != elf_type_t::ELF_TYPE_TEXT) {
8,029✔
2803
        if (this->jlf_cached_opts.full_message) {
914✔
2804
            values = this->jlf_line_values;
372✔
2805
            sa = this->jlf_attr_line.al_attrs;
372✔
2806
        } else {
2807
            values.lvv_sbr = this->jlf_line_values.lvv_sbr.clone();
542✔
2808
            for (const auto& llv : this->jlf_line_values.lvv_values) {
5,505✔
2809
                if (this->jlf_cached_sub_range.contains(llv.lv_origin)) {
4,963✔
2810
                    values.lvv_values.emplace_back(llv);
1,090✔
2811
                    values.lvv_values.back().lv_origin.shift(
1,090✔
2812
                        this->jlf_cached_sub_range.lr_start,
1,090✔
2813
                        -this->jlf_cached_sub_range.lr_start);
1,090✔
2814
                }
2815
            }
2816
            for (const auto& attr : this->jlf_attr_line.al_attrs) {
3,194✔
2817
                if (this->jlf_cached_sub_range.contains(attr.sa_range)) {
2,652✔
2818
                    sa.emplace_back(attr);
853✔
2819
                    sa.back().sa_range.shift(
853✔
2820
                        this->jlf_cached_sub_range.lr_start,
853✔
2821
                        -this->jlf_cached_sub_range.lr_start);
853✔
2822
                }
2823
            }
2824
            values.lvv_opid_value = this->jlf_line_values.lvv_opid_value;
542✔
2825
            values.lvv_opid_provenance
2826
                = this->jlf_line_values.lvv_opid_provenance;
542✔
2827
            values.lvv_thread_id_value
2828
                = to_owned(this->jlf_line_values.lvv_thread_id_value,
542✔
2829
                           values.lvv_allocator);
542✔
2830
            values.lvv_src_file_value = to_owned(
542✔
2831
                this->jlf_line_values.lvv_src_file_value, values.lvv_allocator);
542✔
2832
            values.lvv_src_line_value = to_owned(
542✔
2833
                this->jlf_line_values.lvv_src_line_value, values.lvv_allocator);
542✔
2834
            values.lvv_duration_value
2835
                = this->jlf_line_values.lvv_duration_value;
542✔
2836
        }
2837
        log_format::annotate(lf, line_number, sa, values);
914✔
2838
        return;
914✔
2839
    }
2840

2841
    if (line.empty()) {
7,115✔
2842
        return;
5✔
2843
    }
2844

2845
    values.lvv_values.reserve(this->elf_value_defs.size());
7,110✔
2846

2847
    auto lffs = lf->get_format_file_state();
7,110✔
2848
    int pat_index = lffs.lffs_pattern_locks.pattern_index_for_line(line_number);
7,110✔
2849
    const auto& pat = *this->elf_pattern_order[pat_index];
7,110✔
2850
    char tmp_opid_buf[hasher::STRING_SIZE];
2851

2852
    sa.reserve(pat.p_pcre.pp_value->get_capture_count());
7,110✔
2853
    auto match_res
2854
        = pat.p_pcre.pp_value->capture_from(line.to_string_fragment())
7,110✔
2855
              .into(md)
7,110✔
2856
              .matches(PCRE2_NO_UTF_CHECK)
14,220✔
2857
              .ignore_error();
7,110✔
2858
    if (!match_res) {
7,110✔
2859
        // A continued line still needs a body.
2860
        auto lr = line_range{0, (int) line.length()};
1,666✔
2861
        sa.emplace_back(lr, SA_BODY.value());
1,666✔
2862
        if (!this->lf_multiline) {
1,666✔
2863
            auto len
2864
                = pat.p_pcre.pp_value->match_partial(line.to_string_fragment());
×
2865
            sa.emplace_back(
×
2866
                line_range{(int) len, -1},
×
2867
                SA_INVALID.value("Log line does not match any pattern"));
×
2868
        }
2869
        return;
1,666✔
2870
    }
2871

2872
    auto duration_cap = md[pat.p_duration_field_index];
5,444✔
2873

2874
    auto ts_cap = md[pat.p_timestamp_field_index];
5,444✔
2875
    if (ts_cap) {
5,444✔
2876
        sa.emplace_back(to_line_range(ts_cap.value()), L_TIMESTAMP.value());
5,444✔
2877
        values.lvv_time_value = ts_cap;
5,444✔
2878
    }
2879

2880
    auto opid_cap = md[pat.p_opid_field_index];
5,444✔
2881

2882
    if (this->elf_opid_field.empty()
5,444✔
2883
        && !pat.p_opid_description_field_indexes.empty())
5,444✔
2884
    {
2885
        auto empty_desc = true;
3,807✔
2886
        hasher h;
3,807✔
2887
        for (const auto& fidx : pat.p_opid_description_field_indexes) {
11,421✔
2888
            auto desc_cap = md[fidx];
7,614✔
2889
            if (desc_cap) {
7,614✔
2890
                h.update(desc_cap.value());
7,586✔
2891
                empty_desc = false;
7,586✔
2892
            }
2893
        }
2894
        if (!empty_desc) {
3,807✔
2895
            h.to_string(tmp_opid_buf);
3,807✔
2896
            opid_cap = string_fragment::from_bytes(tmp_opid_buf,
7,614✔
2897
                                                   sizeof(tmp_opid_buf) - 1);
3,807✔
2898
        }
2899
    } else if (duration_cap && !opid_cap) {
1,637✔
2900
        hasher h;
16✔
2901
        h.update(line.to_string_fragment());
16✔
2902
        h.to_string(tmp_opid_buf);
16✔
2903
        opid_cap = string_fragment::from_bytes(tmp_opid_buf,
32✔
2904
                                               sizeof(tmp_opid_buf) - 1);
16✔
2905
    }
2906
    if (opid_cap && !opid_cap->empty()) {
5,444✔
2907
        sa.emplace_back(to_line_range(opid_cap.value()), L_OPID.value());
4,423✔
2908
        values.lvv_opid_value = opid_cap->to_string();
4,423✔
2909
        values.lvv_opid_provenance
2910
            = logline_value_vector::opid_provenance::file;
4,423✔
2911
    }
2912

2913
    auto body_cap = md[pat.p_body_field_index];
5,444✔
2914
    auto level_cap = md[pat.p_level_field_index];
5,444✔
2915
    auto src_file_cap = md[pat.p_src_file_field_index];
5,444✔
2916
    auto src_line_cap = md[pat.p_src_line_field_index];
5,444✔
2917
    auto thread_id_cap = md[pat.p_thread_id_field_index];
5,444✔
2918

2919
    if (level_cap
5,444✔
2920
        && (!body_cap
10,852✔
2921
            || (body_cap && level_cap->sf_begin != body_cap->sf_begin)))
10,852✔
2922
    {
2923
        sa.emplace_back(to_line_range(level_cap.value()), L_LEVEL.value());
4,890✔
2924
    }
2925

2926
    if (src_file_cap) {
5,444✔
2927
        sa.emplace_back(to_line_range(src_file_cap.value()),
312✔
2928
                        SA_SRC_FILE.value());
624✔
2929
        values.lvv_src_file_value
2930
            = src_file_cap->to_owned(values.lvv_allocator);
312✔
2931
    }
2932
    if (src_line_cap) {
5,444✔
2933
        sa.emplace_back(to_line_range(src_line_cap.value()),
312✔
2934
                        SA_SRC_LINE.value());
624✔
2935
        values.lvv_src_line_value
2936
            = src_line_cap->to_owned(values.lvv_allocator);
312✔
2937
    }
2938
    if (thread_id_cap) {
5,444✔
2939
        sa.emplace_back(to_line_range(thread_id_cap.value()),
988✔
2940
                        SA_THREAD_ID.value());
1,976✔
2941
        values.lvv_thread_id_value
2942
            = thread_id_cap->to_owned(values.lvv_allocator);
988✔
2943
    }
2944
    if (duration_cap) {
5,444✔
2945
        sa.emplace_back(to_line_range(duration_cap.value()),
16✔
2946
                        SA_DURATION.value());
32✔
2947
        auto from_res = humanize::try_from<double>(duration_cap.value());
16✔
2948
        if (from_res) {
16✔
2949
            auto dur_secs = from_res->value / this->elf_duration_divisor;
16✔
2950
            auto duration = std::chrono::microseconds(
2951
                static_cast<int64_t>(dur_secs * 1000000));
16✔
2952
            values.lvv_duration_value = duration;
16✔
2953
        }
2954
    }
2955
    if (!values.lvv_duration_value) {
5,444✔
2956
        auto start_ts_cap = md[pat.p_start_timestamp_field_index];
5,428✔
2957
        if (start_ts_cap) {
5,428✔
2958
            exttm start_tm;
×
2959
            timeval start_tv;
2960
            auto dts = this->build_time_scanner();
×
2961
            if (dts.scan(start_ts_cap->data(),
×
2962
                         start_ts_cap->length(),
×
2963
                         this->get_timestamp_formats(),
2964
                         &start_tm,
2965
                         start_tv))
2966
            {
2967
                auto start_us = to_us(start_tv);
×
2968
                auto end_us = (*lf)[line_number].get_time<>();
×
2969
                if (end_us > start_us) {
×
2970
                    values.lvv_duration_value = end_us - start_us;
×
2971
                }
2972
            }
2973
        }
2974
    }
2975

2976
    for (size_t lpc = 0; lpc < pat.p_value_by_index.size(); lpc++) {
61,238✔
2977
        const auto& ivd = pat.p_value_by_index[lpc];
55,794✔
2978
        const scaling_factor* scaling = nullptr;
55,794✔
2979
        auto cap = md[ivd.ivd_index];
55,794✔
2980
        const auto& vd = *ivd.ivd_value_def;
55,794✔
2981

2982
        if (ivd.ivd_unit_field_index >= 0) {
55,794✔
2983
            auto unit_cap = md[ivd.ivd_unit_field_index];
×
2984

2985
            if (unit_cap) {
×
2986
                intern_string_t unit_val
2987
                    = intern_string::lookup(unit_cap.value());
×
2988
                auto unit_iter = vd.vd_unit_scaling.find(unit_val);
×
2989
                if (unit_iter != vd.vd_unit_scaling.end()) {
×
2990
                    const auto& sf = unit_iter->second;
×
2991

2992
                    scaling = &sf;
×
2993
                }
2994
            }
2995
        }
2996

2997
        if (cap) {
55,794✔
2998
            if (vd.vd_meta.lvm_kind == value_kind_t::VALUE_TIMESTAMP) {
45,655✔
2999
                auto dts = this->build_time_scanner();
7✔
3000
                exttm tm;
7✔
3001
                timeval tv;
3002
                auto val_sf = cap.value();
7✔
3003

3004
                if (dts.scan(val_sf.data(),
14✔
3005
                             val_sf.length(),
7✔
3006
                             this->get_timestamp_formats(),
3007
                             &tm,
3008
                             tv,
3009
                             true))
3010
                {
3011
                    char ts[64];
3012
                    tm.et_gmtoff = tm.et_orig_gmtoff;
7✔
3013
                    auto len = dts.ftime(
7✔
3014
                        ts, sizeof(ts), this->get_timestamp_formats(), tm);
3015
                    ts[len] = '\0';
7✔
3016
                    values.lvv_values.emplace_back(vd.vd_meta,
7✔
3017
                                                   std::string{ts, len});
21✔
3018
                    values.lvv_values.back().lv_origin
7✔
3019
                        = to_line_range(cap.value());
14✔
3020
                } else {
3021
                    values.lvv_values.emplace_back(
×
3022
                        vd.vd_meta, line, to_line_range(cap.value()));
×
3023
                }
3024
            } else {
3025
                values.lvv_values.emplace_back(
91,296✔
3026
                    vd.vd_meta, line, to_line_range(cap.value()));
45,648✔
3027
                values.lvv_values.back().apply_scaling(scaling);
45,648✔
3028
            }
3029
        } else {
3030
            values.lvv_values.emplace_back(vd.vd_meta);
10,139✔
3031
        }
3032
    }
3033

3034
    line_range lr;
5,444✔
3035
    if (body_cap && body_cap->is_valid()) {
5,444✔
3036
        lr = to_line_range(body_cap.value());
5,432✔
3037
    } else {
3038
        lr.lr_start = line.length();
12✔
3039
        lr.lr_end = line.length();
12✔
3040
    }
3041
    sa.emplace_back(lr, SA_BODY.value());
5,444✔
3042

3043
    log_format::annotate(lf, line_number, sa, values);
5,444✔
3044
}
3045

3046
void
3047
external_log_format::rewrite(exec_context& ec,
43✔
3048
                             shared_buffer_ref& line,
3049
                             string_attrs_t& sa,
3050
                             std::string& value_out)
3051
{
3052
    auto pg = ec.with_provenance(exec_context::format_rewrite{});
43✔
3053
    auto& values = *ec.ec_line_values;
43✔
3054

3055
    value_out.assign(line.get_data(), line.length());
43✔
3056

3057
    for (auto iter = values.lvv_values.begin(); iter != values.lvv_values.end();
251✔
3058
         ++iter)
208✔
3059
    {
3060
        if (!iter->lv_origin.is_valid()) {
208✔
3061
            log_debug("%d: not rewriting value with invalid origin -- %s",
22✔
3062
                      (int) ec.ec_top_line,
3063
                      iter->lv_meta.lvm_name.get());
3064
            continue;
178✔
3065
        }
3066

3067
        auto vd_iter = this->elf_value_defs.find(iter->lv_meta.lvm_name);
186✔
3068
        if (vd_iter == this->elf_value_defs.end()) {
186✔
3069
            log_debug("%d: not rewriting undefined value -- %s",
×
3070
                      (int) ec.ec_top_line,
3071
                      iter->lv_meta.lvm_name.get());
3072
            continue;
×
3073
        }
3074

3075
        const auto& vd = *vd_iter->second;
186✔
3076

3077
        if (vd.vd_rewriter.empty()) {
186✔
3078
            continue;
156✔
3079
        }
3080

3081
        auto _sg = ec.enter_source(
3082
            vd_iter->second->vd_rewrite_src_name, 1, vd.vd_rewriter);
30✔
3083
        std::string field_value;
30✔
3084

3085
        auto_mem<FILE> tmpout(fclose);
30✔
3086

3087
        tmpout = std::tmpfile();
30✔
3088
        if (!tmpout) {
30✔
3089
            log_error("unable to create temporary file");
×
3090
            return;
×
3091
        }
3092
        fcntl(fileno(tmpout), F_SETFD, FD_CLOEXEC);
30✔
3093
        auto fd_copy = auto_fd::dup_of(fileno(tmpout));
30✔
3094
        fd_copy.close_on_exec();
30✔
3095
        auto ec_out = std::make_pair(tmpout.release(), fclose);
30✔
3096
        {
3097
            exec_context::output_guard og(ec, "tmp", ec_out);
60✔
3098

3099
            auto exec_res = execute_any(ec, vd.vd_rewriter);
30✔
3100
            if (exec_res.isOk()) {
30✔
3101
                field_value = exec_res.unwrap();
30✔
3102
            } else {
3103
                field_value = exec_res.unwrapErr().to_attr_line().get_string();
×
3104
            }
3105
        }
30✔
3106
        struct stat st;
3107
        fstat(fd_copy.get(), &st);
30✔
3108
        if (st.st_size > 0) {
30✔
3109
            auto buf = auto_buffer::alloc(st.st_size);
2✔
3110

3111
            buf.resize(st.st_size);
2✔
3112
            pread(fd_copy.get(), buf.in(), st.st_size, 0);
2✔
3113
            field_value = buf.to_string();
2✔
3114
        }
2✔
3115
        value_out.erase(iter->lv_origin.lr_start, iter->lv_origin.length());
30✔
3116

3117
        int32_t shift_amount
3118
            = ((int32_t) field_value.length()) - iter->lv_origin.length();
30✔
3119
        auto orig_lr = iter->lv_origin;
30✔
3120
        value_out.insert(iter->lv_origin.lr_start, field_value);
30✔
3121
        for (auto shift_iter = values.lvv_values.begin();
30✔
3122
             shift_iter != values.lvv_values.end();
170✔
3123
             ++shift_iter)
140✔
3124
        {
3125
            shift_iter->lv_origin.shift_range(orig_lr, shift_amount);
140✔
3126
        }
3127
        shift_string_attrs(sa, orig_lr, shift_amount);
30✔
3128
    }
30✔
3129
}
43✔
3130

3131
static int
3132
read_json_field(yajlpp_parse_context* ypc,
234,785✔
3133
                const unsigned char* str,
3134
                size_t len,
3135
                yajl_string_props_t* props)
3136
{
3137
    auto* jlu = (json_log_userdata*) ypc->ypc_userdata;
234,785✔
3138
    timeval tv_out;
3139
    const auto frag = string_fragment::from_bytes(str, len);
234,785✔
3140
    intern_string_t field_name;
234,785✔
3141
    const auto* vd = jlu->get_field_def(ypc);
234,785✔
3142

3143
    if (vd != nullptr) {
234,785✔
3144
        field_name = vd->vd_meta.lvm_name;
24,003✔
3145
    }
3146

3147
    if (field_name.empty()) {
234,785✔
3148
        if (!jlu->jlu_format->elf_opid_field.empty()) {
210,782✔
3149
            auto path_sf = ypc->get_path_as_string_fragment();
71,866✔
3150
            if (path_sf.startswith(jlu->jlu_format->elf_opid_field.c_str())) {
71,866✔
3151
                jlu->jlu_opid_hasher.update(path_sf);
11,157✔
3152
                jlu->jlu_opid_hasher.update(frag);
11,157✔
3153
            }
3154
        }
3155
    } else if (jlu->jlu_format->lf_timestamp_field == field_name) {
24,003✔
3156
        const auto* last = jlu->jlu_format->lf_date_time.scan(
4,712✔
3157
            (const char*) str,
3158
            len,
3159
            jlu->jlu_format->get_timestamp_formats(),
4,712✔
3160
            &jlu->jlu_exttm,
3161
            tv_out);
3162
        if (last == nullptr) {
4,712✔
3163
            auto ls = jlu->jlu_format->lf_date_time.unlock();
41✔
3164
            if ((last = jlu->jlu_format->lf_date_time.scan(
41✔
3165
                     (const char*) str,
3166
                     len,
3167
                     jlu->jlu_format->get_timestamp_formats(),
41✔
3168
                     &jlu->jlu_exttm,
3169
                     tv_out))
3170
                == nullptr)
41✔
3171
            {
3172
                jlu->jlu_format->lf_date_time.relock(ls);
19✔
3173
            }
3174
            if (last != nullptr) {
41✔
3175
                auto old_flags
22✔
3176
                    = jlu->jlu_format->lf_timestamp_flags & DATE_TIME_SET_FLAGS;
22✔
3177
                auto new_flags = jlu->jlu_exttm.et_flags & DATE_TIME_SET_FLAGS;
22✔
3178

3179
                // It is unlikely a valid timestamp would lose much
3180
                // precision.
3181
                if (new_flags != old_flags) {
22✔
3182
                    last = nullptr;
×
3183
                }
3184
            }
3185
        }
3186
        if (last != nullptr) {
4,712✔
3187
            jlu->jlu_format->lf_timestamp_flags = jlu->jlu_exttm.et_flags;
4,693✔
3188
            jlu->jlu_base_line->set_time(tv_out);
4,693✔
3189
        } else {
3190
            jlu->jlu_scan_error = log_format::scan_error{fmt::format(
19✔
3191
                FMT_STRING(
76✔
3192
                    "failed to parse timestamp '{}' in string property '{}'"),
3193
                frag,
3194
                field_name)};
19✔
3195
        }
3196
    } else if (!jlu->jlu_format->lf_start_timestamp_field.empty()
19,291✔
3197
               && jlu->jlu_format->lf_start_timestamp_field == field_name)
19,291✔
3198
    {
3199
        exttm start_tm;
3✔
3200
        timeval start_tv;
3201
        const auto* last = jlu->jlu_format->lf_date_time.scan(
3✔
3202
            (const char*) str,
3203
            len,
3204
            jlu->jlu_format->get_timestamp_formats(),
3✔
3205
            &start_tm,
3206
            start_tv);
3207
        if (last != nullptr) {
3✔
3208
            jlu->jlu_start_time = to_us(start_tv);
3✔
3209
        }
3210
    } else if (jlu->jlu_format->elf_level_pointer.pp_value != nullptr) {
19,288✔
3211
        if (jlu->jlu_format->elf_level_pointer.pp_value
240✔
3212
                ->find_in(field_name.to_string_fragment(), PCRE2_NO_UTF_CHECK)
240✔
3213
                .ignore_error()
240✔
3214
                .has_value())
120✔
3215
        {
3216
            jlu->jlu_base_line->set_level(
×
3217
                jlu->jlu_format->convert_level(frag, jlu->jlu_batch_context));
×
3218
        }
3219
    }
3220
    if (!field_name.empty() && jlu->jlu_format->elf_level_field == field_name) {
234,785✔
3221
        jlu->jlu_base_line->set_level(
4,923✔
3222
            jlu->jlu_format->convert_level(frag, jlu->jlu_batch_context));
4,923✔
3223
    }
3224
    if (!field_name.empty() && jlu->jlu_format->elf_opid_field == field_name) {
234,785✔
3225
        jlu->jlu_base_line->merge_bloom_bits(frag.bloom_bits());
1,144✔
3226

3227
        auto& sbc = *jlu->jlu_batch_context;
1,144✔
3228
        auto opid_iter = sbc.sbc_opids.los_opid_ranges.find(frag);
1,144✔
3229
        if (opid_iter == sbc.sbc_opids.los_opid_ranges.end()) {
1,144✔
3230
            jlu->jlu_opid_frag = frag.to_owned(sbc.sbc_allocator);
1,114✔
3231
        } else {
3232
            jlu->jlu_opid_frag = opid_iter->first;
30✔
3233
        }
3234
    }
3235
    if (!field_name.empty()
234,785✔
3236
        && jlu->jlu_format->elf_thread_id_field == field_name)
234,785✔
3237
    {
3238
        auto& sbc = *jlu->jlu_batch_context;
×
3239
        auto tid_iter = sbc.sbc_tids.ltis_tid_ranges.find(frag);
×
3240
        if (tid_iter == sbc.sbc_tids.ltis_tid_ranges.end()) {
×
3241
            jlu->jlu_tid_frag = frag.to_owned(sbc.sbc_allocator);
×
3242
        } else {
3243
            jlu->jlu_tid_frag = tid_iter->first;
×
3244
        }
3245
    }
3246
    if (!jlu->jlu_format->elf_subid_field.empty()
234,785✔
3247
        && jlu->jlu_format->elf_subid_field == field_name)
234,785✔
3248
    {
3249
        jlu->jlu_subid = frag.to_string();
×
3250
    }
3251
    if (!field_name.empty()
234,785✔
3252
        && jlu->jlu_format->elf_duration_field == field_name)
234,785✔
3253
    {
3254
        auto from_res = humanize::try_from<double>(frag);
×
3255
        if (from_res) {
×
3256
            auto dur_secs
3257
                = from_res->value / jlu->jlu_format->elf_duration_divisor;
×
3258
            jlu->jlu_duration
3259
                = std::max(1us,
×
3260
                           std::chrono::microseconds(
×
3261
                               static_cast<int64_t>(dur_secs * 1000000)));
3262
        }
3263
    }
3264

3265
    if (vd != nullptr && vd->vd_is_desc_field) {
234,785✔
3266
        auto frag_copy = frag.to_owned(jlu->jlu_format->lf_desc_allocator);
87✔
3267

3268
        jlu->jlu_format->lf_desc_captures.emplace(field_name, frag_copy);
87✔
3269
    }
3270

3271
    jlu->add_sub_lines_for(vd, ypc->is_level(1), std::nullopt, str, len, props);
234,785✔
3272

3273
    return 1;
234,785✔
3274
}
3275

3276
static int
3277
rewrite_json_field(yajlpp_parse_context* ypc,
22,361✔
3278
                   const unsigned char* str,
3279
                   size_t len,
3280
                   yajl_string_props_t* props)
3281
{
3282
    static const intern_string_t body_name = intern_string::lookup("body", -1);
22,361✔
3283
    auto* jlu = (json_log_userdata*) ypc->ypc_userdata;
22,361✔
3284
    intern_string_t field_name;
22,361✔
3285
    const auto* vd = jlu->get_field_def(ypc);
22,361✔
3286
    auto frag = string_fragment::from_bytes(str, len);
22,361✔
3287

3288
    if (!ypc->is_level(1) && vd == nullptr) {
22,361✔
3289
        if (!jlu->jlu_format->elf_opid_field.empty()) {
17,312✔
3290
            auto path_sf = ypc->get_path_as_string_fragment();
16,632✔
3291
            if (path_sf.startswith(jlu->jlu_format->elf_opid_field.c_str())) {
16,632✔
3292
                jlu->jlu_opid_hasher.update(path_sf);
26✔
3293
                jlu->jlu_opid_hasher.update(frag);
26✔
3294
            }
3295
        }
3296
        return 1;
17,312✔
3297
    }
3298
    if (vd != nullptr) {
5,049✔
3299
        field_name = vd->vd_meta.lvm_name;
4,703✔
3300
    } else {
3301
        field_name = ypc->get_path();
346✔
3302
    }
3303

3304
    if (jlu->jlu_format->elf_opid_field == field_name) {
5,049✔
3305
        jlu->jlu_format->jlf_line_values.lvv_opid_value = frag.to_string();
337✔
3306
        jlu->jlu_format->jlf_line_values.lvv_opid_provenance
337✔
3307
            = logline_value_vector::opid_provenance::file;
337✔
3308
    }
3309
    if (jlu->jlu_format->elf_thread_id_field == field_name) {
5,049✔
3310
        jlu->jlu_format->jlf_line_values.lvv_thread_id_value
×
3311
            = frag.to_owned(jlu->jlu_format->jlf_line_values.lvv_allocator);
×
3312
    }
3313
    if (jlu->jlu_format->lf_timestamp_field == field_name) {
5,049✔
3314
        char time_buf[64];
3315

3316
        // TODO add a timeval kind to logline_value
3317
        if (jlu->jlu_line->is_time_skewed()
856✔
3318
            || jlu->jlu_line->get_msg_level() == LEVEL_INVALID
840✔
3319
            || (jlu->jlu_format->lf_timestamp_flags
1,696✔
3320
                & (ETF_MICROS_SET | ETF_NANOS_SET | ETF_ZONE_SET)))
832✔
3321
        {
3322
            timeval tv;
3323

3324
            const auto* last = jlu->jlu_format->lf_date_time.scan(
856✔
3325
                (const char*) str,
3326
                len,
3327
                jlu->jlu_format->get_timestamp_formats(),
856✔
3328
                &jlu->jlu_exttm,
3329
                tv);
3330
            if (last == nullptr) {
856✔
3331
                auto ls = jlu->jlu_format->lf_date_time.unlock();
32✔
3332
                if ((last = jlu->jlu_format->lf_date_time.scan(
32✔
3333
                         (const char*) str,
3334
                         len,
3335
                         jlu->jlu_format->get_timestamp_formats(),
32✔
3336
                         &jlu->jlu_exttm,
3337
                         tv))
3338
                    == nullptr)
32✔
3339
                {
3340
                    jlu->jlu_format->lf_date_time.relock(ls);
16✔
3341
                    jlu->jlu_scan_error = log_format::scan_error{
16✔
3342
                        fmt::format(FMT_STRING("failed to parse timestamp "
64✔
3343
                                               "'{}' in string property '{}'"),
3344
                                    frag,
3345
                                    field_name)};
16✔
3346
                }
3347
            }
3348
            if (!jlu->jlu_subline_opts.hash_hack) {
856✔
3349
                if (jlu->jlu_exttm.et_flags & ETF_ZONE_SET
856✔
3350
                    && jlu->jlu_format->lf_date_time.dts_zoned_to_local)
840✔
3351
                {
3352
                    jlu->jlu_exttm.et_flags &= ~ETF_Z_IS_UTC;
840✔
3353
                }
3354
                jlu->jlu_exttm.et_gmtoff
3355
                    = jlu->jlu_format->lf_date_time.dts_local_offset_cache;
856✔
3356
            }
3357
            jlu->jlu_format->lf_date_time.ftime(
856✔
3358
                time_buf,
3359
                sizeof(time_buf),
3360
                jlu->jlu_format->get_timestamp_formats(),
856✔
3361
                jlu->jlu_exttm);
856✔
3362
        } else {
3363
            sql_strftime(
×
3364
                time_buf, sizeof(time_buf), jlu->jlu_line->get_timeval(), 'T');
×
3365
        }
3366
        jlu->jlu_format->jlf_line_values.lvv_time_value = frag;
856✔
3367
        jlu->jlu_format->jlf_line_values.lvv_values.emplace_back(
1,712✔
3368
            jlu->jlu_format->get_value_meta(field_name,
1,712✔
3369
                                            value_kind_t::VALUE_TEXT),
3370
            std::string{time_buf});
2,568✔
3371
    } else if (!jlu->jlu_format->lf_start_timestamp_field.empty()
4,193✔
3372
               && jlu->jlu_format->lf_start_timestamp_field == field_name)
4,193✔
3373
    {
3374
        exttm start_tm;
4✔
3375
        timeval start_tv;
3376
        const auto* last = jlu->jlu_format->lf_date_time.scan(
4✔
3377
            (const char*) str,
3378
            len,
3379
            jlu->jlu_format->get_timestamp_formats(),
4✔
3380
            &start_tm,
3381
            start_tv);
3382
        if (last != nullptr) {
4✔
3383
            jlu->jlu_start_time = to_us(start_tv);
4✔
3384
        }
3385
    } else if (vd != nullptr
4,189✔
3386
               && vd->vd_meta.lvm_kind == value_kind_t::VALUE_TIMESTAMP)
3,843✔
3387
    {
3388
        auto dts = jlu->jlu_format->build_time_scanner();
99✔
3389
        exttm tm;
99✔
3390
        timeval tv;
3391

3392
        if (dts.scan((const char*) str,
99✔
3393
                     len,
3394
                     jlu->jlu_format->get_timestamp_formats(),
99✔
3395
                     &tm,
3396
                     tv,
3397
                     true))
3398
        {
3399
            char ts[64];
3400
            tm.et_gmtoff = tm.et_orig_gmtoff;
99✔
3401
            auto tslen = dts.ftime(
99✔
3402
                ts, sizeof(ts), jlu->jlu_format->get_timestamp_formats(), tm);
99✔
3403
            ts[tslen] = '\0';
99✔
3404
            jlu->jlu_format->jlf_line_values.lvv_values.emplace_back(
99✔
3405
                jlu->jlu_format->get_value_meta(
198✔
3406
                    ypc, vd, value_kind_t::VALUE_TIMESTAMP),
3407
                std::string{(const char*) ts, tslen});
396✔
3408
        } else {
3409
            jlu->jlu_format->jlf_line_values.lvv_values.emplace_back(
×
3410
                jlu->jlu_format->get_value_meta(
×
3411
                    ypc, vd, value_kind_t::VALUE_TEXT),
3412
                std::string{(const char*) str, len});
×
3413
        }
3414
    } else if (jlu->jlu_shared_buffer.contains((const char*) str)) {
4,189✔
3415
        auto str_offset = (int) ((const char*) str - jlu->jlu_line_value);
3,914✔
3416
        if (field_name == jlu->jlu_format->elf_body_field) {
3,914✔
3417
            jlu->jlu_format->jlf_line_values.lvv_values.emplace_back(
375✔
3418
                logline_value_meta(body_name,
750✔
3419
                                   value_kind_t::VALUE_TEXT,
3420
                                   logline_value_meta::internal_column{},
×
3421
                                   jlu->jlu_format),
375✔
3422
                string_fragment::from_byte_range(
750✔
3423
                    jlu->jlu_shared_buffer.get_data(),
375✔
3424
                    str_offset,
3425
                    str_offset + len));
375✔
3426
        }
3427

3428
        jlu->jlu_format->jlf_line_values.lvv_values.emplace_back(
3,914✔
3429
            jlu->jlu_format->get_value_meta(ypc, vd, value_kind_t::VALUE_TEXT),
7,828✔
3430
            string_fragment::from_byte_range(jlu->jlu_shared_buffer.get_data(),
7,828✔
3431
                                             str_offset,
3432
                                             str_offset + len));
3,914✔
3433
    } else {
3434
        if (field_name == jlu->jlu_format->elf_body_field) {
176✔
3435
            jlu->jlu_format->jlf_line_values.lvv_values.emplace_back(
122✔
3436
                logline_value_meta(body_name,
244✔
3437
                                   value_kind_t::VALUE_TEXT,
3438
                                   logline_value_meta::internal_column{},
×
3439
                                   jlu->jlu_format),
122✔
3440
                std::string{(const char*) str, len});
488✔
3441
        }
3442

3443
        jlu->jlu_format->jlf_line_values.lvv_values.emplace_back(
176✔
3444
            jlu->jlu_format->get_value_meta(ypc, vd, value_kind_t::VALUE_TEXT),
352✔
3445
            std::string{(const char*) str, len});
704✔
3446
    }
3447
    if (vd != nullptr && vd->vd_is_desc_field
4,703✔
3448
        && jlu->jlu_format->elf_opid_field.empty())
9,752✔
3449
    {
3450
        auto frag_copy = frag.to_owned(jlu->jlu_format->lf_desc_allocator);
62✔
3451

3452
        jlu->jlu_format->lf_desc_captures.emplace(field_name, frag_copy);
62✔
3453
    }
3454

3455
    return 1;
5,049✔
3456
}
3457

3458
static value_kind_t
3459
csv_cell_kind_to_value_kind(separated_string::cell_kind k)
51✔
3460
{
3461
    switch (k) {
51✔
3462
        case separated_string::cell_kind::empty:
2✔
3463
            return value_kind_t::VALUE_NULL;
2✔
3464
        case separated_string::cell_kind::integer:
13✔
3465
            return value_kind_t::VALUE_INTEGER;
13✔
3466
        case separated_string::cell_kind::floating:
13✔
3467
            return value_kind_t::VALUE_FLOAT;
13✔
3468
        case separated_string::cell_kind::number_with_suffix:
23✔
3469
        case separated_string::cell_kind::other:
3470
            return value_kind_t::VALUE_TEXT;
23✔
3471
    }
3472
    return value_kind_t::VALUE_TEXT;
×
3473
}
3474

3475
void
3476
external_log_format::process_csv_cell(logline_value_vector& values,
671✔
3477
                                      string_attrs_t* sa,
3478
                                      const separated_string::iterator& it,
3479
                                      shared_buffer_ref& sbr) const
3480
{
3481
    const auto& [col_name, vd] = this->elf_value_def_read_order[it.index()];
671✔
3482
    auto field_sf = *it;
671✔
3483
    auto lr = to_line_range(field_sf);
671✔
3484
    // separated_string preserves CSV `""` escapes verbatim in the
3485
    // returned fragment; collapse them so renderers, SQL queries,
3486
    // and downstream caps see the canonical text.  No-op when the
3487
    // cell has no embedded quote.
3488
    const bool needs_unescape = field_sf.find('"').has_value();
671✔
3489

3490
    if (vd == nullptr) {
671✔
3491
        auto lvm = this->get_value_meta(intern_string::lookup(col_name),
102✔
3492
                                        csv_cell_kind_to_value_kind(it.kind()))
3493
                       .with_struct_name(LOG_EXTRA_FIELDS_STR);
51✔
3494
        values.lvv_values.emplace_back(lvm, sbr, lr);
51✔
3495
        if (needs_unescape) {
51✔
3496
            values.lvv_values.back().lv_str
2✔
3497
                = separated_string::unescape_quoted(field_sf);
4✔
3498
        }
3499
        return;
51✔
3500
    }
51✔
3501

3502
    values.lvv_values.emplace_back(vd->vd_meta, sbr, lr);
620✔
3503
    if (needs_unescape) {
620✔
3504
        values.lvv_values.back().lv_str
8✔
3505
            = separated_string::unescape_quoted(field_sf);
16✔
3506
    }
3507

3508
    const auto& name = vd->vd_meta.lvm_name;
620✔
3509

3510
    if (sa != nullptr) {
620✔
3511
        if (name == this->lf_timestamp_field) {
436✔
3512
            sa->emplace_back(lr, L_TIMESTAMP.value());
44✔
3513
        } else if (name == this->elf_level_field) {
392✔
3514
            sa->emplace_back(lr, L_LEVEL.value());
44✔
3515
        } else if (name == this->elf_body_field) {
348✔
3516
            sa->emplace_back(lr, SA_BODY.value());
44✔
3517
        }
3518
    }
3519

3520
    auto canon_sf = needs_unescape
3521
        ? unescape_csv_cell(field_sf, values.lvv_allocator)
628✔
3522
        : field_sf;
628✔
3523

3524
    if (name == this->elf_opid_field) {
620✔
3525
        if (!canon_sf.empty() && !canon_sf.is_one_of("-", "--")) {
50✔
3526
            values.lvv_opid_value = canon_sf.to_string();
44✔
3527
            values.lvv_opid_provenance
3528
                = logline_value_vector::opid_provenance::file;
44✔
3529
            if (sa != nullptr) {
44✔
3530
                sa->emplace_back(lr, L_OPID.value());
37✔
3531
            }
3532
        }
3533
    } else if (name == this->elf_duration_field) {
570✔
3534
        if (!canon_sf.empty()) {
44✔
3535
            auto from_res = humanize::try_from<double>(canon_sf);
44✔
3536
            if (from_res) {
44✔
3537
                auto dur_secs = from_res->value / this->elf_duration_divisor;
44✔
3538
                values.lvv_duration_value = std::chrono::microseconds(
88✔
3539
                    static_cast<int64_t>(dur_secs * 1000000));
44✔
3540
                if (sa != nullptr) {
44✔
3541
                    sa->emplace_back(lr, SA_DURATION.value());
44✔
3542
                }
3543
            }
3544
        }
3545
    } else if (name == this->elf_thread_id_field) {
526✔
3546
        if (!canon_sf.empty() && !canon_sf.is_one_of("-", "--")) {
43✔
3547
            values.lvv_thread_id_value
3548
                = canon_sf.to_owned(values.lvv_allocator);
37✔
3549
            if (sa != nullptr) {
37✔
3550
                sa->emplace_back(lr, SA_THREAD_ID.value());
37✔
3551
            }
3552
        }
3553
    } else if (name == this->elf_src_file_field) {
483✔
3554
        if (!canon_sf.empty()) {
43✔
3555
            values.lvv_src_file_value = canon_sf.to_owned(values.lvv_allocator);
43✔
3556
            if (sa != nullptr) {
43✔
3557
                sa->emplace_back(lr, SA_SRC_FILE.value());
43✔
3558
            }
3559
        }
3560
    } else if (name == this->elf_src_line_field) {
440✔
3561
        if (!canon_sf.empty()) {
43✔
3562
            values.lvv_src_line_value = canon_sf.to_owned(values.lvv_allocator);
43✔
3563
            if (sa != nullptr) {
43✔
3564
                sa->emplace_back(lr, SA_SRC_LINE.value());
43✔
3565
            }
3566
        }
3567
    }
3568
}
3569

3570
void
3571
external_log_format::rewrite_tabular_subline(const log_format_file_state& lffs,
58✔
3572
                                             const logline& ll,
3573
                                             shared_buffer_ref& sbr,
3574
                                             subline_options opts)
3575
{
3576
    auto line_frag = sbr.to_string_fragment();
58✔
3577

3578
    this->jlf_share_manager.invalidate_refs();
58✔
3579
    this->jlf_attr_line.clear();
58✔
3580
    this->jlf_line_values.clear();
58✔
3581
    this->jlf_line_offsets.clear();
58✔
3582

3583
    // Parse the row using the locked separator and column order from
3584
    // the header detected in scan_tabular.
3585
    auto ss = separated_string(line_frag);
58✔
3586
    ss.ss_separator = this->tlf_separator;
58✔
3587
    ss.ss_expected_count = this->elf_value_def_read_order.size();
58✔
3588

3589
    for (auto it = ss.begin(); it != ss.end(); ++it) {
528✔
3590
        if (it.index() >= this->elf_value_def_read_order.size()) {
235✔
3591
            break;
×
3592
        }
3593
        this->process_csv_cell(this->jlf_line_values, nullptr, it, sbr);
235✔
3594
        // Hidden values' lv_origin would otherwise point at the
3595
        // original CSV byte range, but lvv_sbr is about to be
3596
        // re-pointed at the rewritten line where those offsets are
3597
        // meaningless.  Invalidate the origin so text_attrs_for_line
3598
        // skips emplacing SA_HIDDEN — apply_hide() would otherwise
3599
        // smear a U+22EE icon over whatever happened to land at
3600
        // those byte positions in the rewritten line.  The value
3601
        // itself still appears in annotate's output for SQL.
3602
        if (this->jlf_line_values.lvv_values.back().lv_meta.is_hidden()) {
235✔
3603
            this->jlf_line_values.lvv_values.back().lv_origin.clear();
58✔
3604
        }
3605
    }
3606

3607
    this->jlf_used_values.assign(this->jlf_line_values.lvv_values.size(),
58✔
3608
                                 false);
58✔
3609
    int sub_offset = this->jlf_line_format_init_count;
58✔
3610

3611
    this->render_line_format(
58✔
3612
        lffs, ll, opts, nullptr, this->jlf_used_values, sub_offset);
58✔
3613

3614
    this->json_append_to_cache("\n", 1);
58✔
3615
    sub_offset += 1;
58✔
3616

3617
    this->emit_detail_block(this->jlf_used_values, sub_offset);
58✔
3618

3619
    this->compute_subline_offsets();
58✔
3620
}
58✔
3621

3622
void
3623
external_log_format::compute_subline_offsets()
1,008✔
3624
{
3625
    this->jlf_line_offsets.push_back(0);
1,008✔
3626
    for (size_t lpc = 0; lpc < this->jlf_attr_line.al_string.size(); lpc++) {
129,989✔
3627
        if (this->jlf_attr_line.al_string[lpc] == '\n') {
128,981✔
3628
            this->jlf_line_offsets.push_back(lpc + 1);
2,256✔
3629
        }
3630
    }
3631
    this->jlf_line_offsets.push_back(this->jlf_attr_line.al_string.size());
1,008✔
3632
}
1,008✔
3633

3634
void
3635
external_log_format::share_rewritten_subline(const logline& ll,
1,969✔
3636
                                             shared_buffer_ref& sbr,
3637
                                             subline_options opts)
3638
{
3639
    off_t this_off = 0, next_off = 0;
1,969✔
3640

3641
    if (!this->jlf_line_offsets.empty()
1,969✔
3642
        && ll.get_sub_offset() < (int) this->jlf_line_offsets.size())
1,969✔
3643
    {
3644
        this_off = this->jlf_line_offsets[ll.get_sub_offset()];
1,969✔
3645
        if ((ll.get_sub_offset() + 1) < (int) this->jlf_line_offsets.size()) {
1,969✔
3646
            next_off = this->jlf_line_offsets[ll.get_sub_offset() + 1];
1,969✔
3647
        } else {
3648
            next_off = this->jlf_attr_line.al_string.size();
×
3649
        }
3650
        if (next_off > 0 && this->jlf_attr_line.al_string[next_off - 1] == '\n'
1,969✔
3651
            && this_off != next_off)
3,938✔
3652
        {
3653
            next_off -= 1;
1,969✔
3654
        }
3655
    }
3656

3657
    if (opts.full_message) {
1,969✔
3658
        sbr.share(this->jlf_share_manager,
738✔
3659
                  this->jlf_attr_line.al_string.data(),
369✔
3660
                  this->jlf_attr_line.al_string.size());
3661
    } else {
3662
        sbr.share(this->jlf_share_manager,
3,200✔
3663
                  this->jlf_attr_line.al_string.data() + this_off,
1,600✔
3664
                  next_off - this_off);
1,600✔
3665
    }
3666
    sbr.get_metadata().m_valid_utf = ll.is_valid_utf();
1,969✔
3667
    sbr.get_metadata().m_has_ansi = ll.has_ansi();
1,969✔
3668
    this->jlf_cached_sub_range.lr_start = this_off;
1,969✔
3669
    this->jlf_cached_sub_range.lr_end = next_off;
1,969✔
3670
    this->jlf_line_values.lvv_sbr = sbr.clone();
1,969✔
3671
}
1,969✔
3672

3673
void
3674
external_log_format::apply_text_transform(
9,469✔
3675
    size_t begin_size, json_format_element::transform_t transform)
3676
{
3677
    auto& s = this->jlf_attr_line.al_string;
9,469✔
3678
    switch (transform) {
9,469✔
3679
        case json_format_element::transform_t::NONE:
9,407✔
3680
            break;
9,407✔
3681
        case json_format_element::transform_t::UPPERCASE:
62✔
3682
            for (size_t cindex = begin_size; cindex < s.size(); cindex++) {
486✔
3683
                s[cindex] = toupper(s[cindex]);
424✔
3684
            }
3685
            break;
62✔
3686
        case json_format_element::transform_t::LOWERCASE:
×
3687
            for (size_t cindex = begin_size; cindex < s.size(); cindex++) {
×
3688
                s[cindex] = tolower(s[cindex]);
×
3689
            }
3690
            break;
×
3691
        case json_format_element::transform_t::CAPITALIZE:
×
3692
            if (begin_size < s.size()) {
×
3693
                s[begin_size] = toupper(s[begin_size]);
×
3694
            }
3695
            for (size_t cindex = begin_size + 1; cindex < s.size(); cindex++) {
×
3696
                s[cindex] = tolower(s[cindex]);
×
3697
            }
3698
            break;
×
3699
    }
3700
}
9,469✔
3701

3702
bool
3703
external_log_format::emit_overflow(const log_format_file_state& lffs,
3,570✔
3704
                                   const json_format_element& jfe,
3705
                                   const value_def* vd,
3706
                                   std::string& str)
3707
{
3708
    if ((int) str.size() <= jfe.jfe_max_width) {
3,570✔
3709
        return false;
3,467✔
3710
    }
3711
    switch (jfe.jfe_overflow) {
103✔
3712
        case json_format_element::overflow_t::ABBREV: {
72✔
3713
            size_t new_size
3714
                = abbreviate_str(&str[0], str.size(), jfe.jfe_max_width);
72✔
3715
            str.resize(new_size);
72✔
3716
            this->json_append(lffs, jfe, vd, str);
72✔
3717
            break;
72✔
3718
        }
3719
        case json_format_element::overflow_t::TRUNCATE:
27✔
3720
            this->json_append_to_cache(str.c_str(), jfe.jfe_max_width);
27✔
3721
            break;
27✔
3722
        case json_format_element::overflow_t::DOTDOT: {
4✔
3723
            size_t middle = (jfe.jfe_max_width / 2) - 1;
4✔
3724
            this->json_append_to_cache(str.c_str(), middle);
4✔
3725
            this->json_append_to_cache("..", 2);
4✔
3726
            size_t rest = (jfe.jfe_max_width - middle - 2);
4✔
3727
            this->json_append_to_cache(str.c_str() + str.size() - rest, rest);
4✔
3728
            break;
4✔
3729
        }
3730
        case json_format_element::overflow_t::LASTWORD: {
×
3731
            size_t new_size
3732
                = last_word_str(&str[0], str.size(), jfe.jfe_max_width);
×
3733
            str.resize(new_size);
×
3734
            this->json_append(lffs, jfe, vd, str);
×
3735
            break;
×
3736
        }
3737
    }
3738
    return true;
103✔
3739
}
3740

3741
void
3742
external_log_format::render_line_format(const log_format_file_state& lffs,
984✔
3743
                                        const logline& ll,
3744
                                        subline_options opts,
3745
                                        const exttm* ts_extra_bits,
3746
                                        std::vector<bool>& used_values,
3747
                                        int& sub_offset)
3748
{
3749
    static const intern_string_t ts_field
3750
        = intern_string::lookup("__timestamp__", -1);
984✔
3751
    static const intern_string_t level_field
3752
        = intern_string::lookup("__level__");
1,116✔
3753
    static const intern_string_t duration_field
3754
        = intern_string::lookup("__duration__");
1,116✔
3755

3756
    line_range lr;
984✔
3757

3758
    for (const auto& jfe : this->jlf_line_format) {
12,277✔
3759
        size_t begin_size = this->jlf_attr_line.al_string.size();
11,293✔
3760

3761
        switch (jfe.jfe_type) {
11,293✔
3762
            case json_log_field::CONSTANT:
1,824✔
3763
                this->json_append_to_cache(jfe.jfe_default_value.c_str(),
1,824✔
3764
                                           jfe.jfe_default_value.size());
1,824✔
3765
                break;
1,824✔
3766
            case json_log_field::VARIABLE: {
9,469✔
3767
                auto lv_iter = std::find_if(
9,469✔
3768
                    this->jlf_line_values.lvv_values.begin(),
3769
                    this->jlf_line_values.lvv_values.end(),
3770
                    logline_value_name_cmp(&jfe.jfe_value.pp_value));
3771
                if (lv_iter != this->jlf_line_values.lvv_values.end()) {
9,469✔
3772
                    auto str = lv_iter->to_string();
3,570✔
3773
                    value_def* vd = nullptr;
3,570✔
3774
                    if (lv_iter->lv_meta.lvm_values_index) {
3,570✔
3775
                        vd = this->elf_value_def_order
3776
                                 [lv_iter->lv_meta.lvm_values_index.value()]
3,570✔
3777
                                     .get();
3,570✔
3778
                    }
3779
                    while (endswith(str, "\n")) {
3,654✔
3780
                        str.pop_back();
84✔
3781
                    }
3782
                    size_t nl_pos = str.find('\n');
3,570✔
3783

3784
                    if (!jfe.jfe_prefix.empty()) {
3,570✔
3785
                        this->json_append_to_cache(jfe.jfe_prefix);
1,827✔
3786
                    }
3787
                    lr.lr_start = this->jlf_attr_line.al_string.size();
3,570✔
3788

3789
                    if (!this->emit_overflow(lffs, jfe, vd, str)) {
3,570✔
3790
                        sub_offset += std::count(str.begin(), str.end(), '\n');
3,467✔
3791
                        if (vd != nullptr
3,467✔
3792
                            && vd->vd_meta.lvm_kind == value_kind_t::VALUE_JSON)
3,467✔
3793
                        {
3794
                            auto json_al = attr_line_t();
12✔
3795
                            json_al.append(str);
12✔
3796
                            highlight_syntax(
12✔
3797
                                text_format_t::TF_JSON, json_al, std::nullopt);
3798
                            this->jlf_attr_line.append(json_al);
12✔
3799
                        } else {
12✔
3800
                            this->json_append(lffs, jfe, vd, str);
3,455✔
3801
                        }
3802
                    }
3803

3804
                    if (nl_pos == std::string::npos || opts.full_message) {
3,570✔
3805
                        lr.lr_end = this->jlf_attr_line.al_string.size();
3,565✔
3806
                    } else {
3807
                        lr.lr_end = lr.lr_start + nl_pos;
5✔
3808
                    }
3809

3810
                    const auto& name = lv_iter->lv_meta.lvm_name;
3,570✔
3811
                    if (name == this->lf_timestamp_field) {
3,570✔
3812
                        this->jlf_attr_line.al_attrs.emplace_back(
390✔
3813
                            lr, L_TIMESTAMP.value());
780✔
3814
                    } else if (name == this->elf_body_field) {
3,180✔
3815
                        this->jlf_attr_line.al_attrs.emplace_back(
515✔
3816
                            lr, SA_BODY.value());
1,030✔
3817
                    } else if (name == this->elf_src_loc_field) {
2,665✔
3818
                        size_t digits = 0;
×
3819
                        for (auto str_iter = str.rbegin();
×
3820
                             str_iter != str.rend();
×
3821
                             ++str_iter)
×
3822
                        {
3823
                            if (isdigit(*str_iter)) {
×
3824
                                digits += 1;
×
3825
                            } else {
3826
                                break;
×
3827
                            }
3828
                        }
3829
                        auto diff = str.size() - digits;
×
3830
                        auto file_lr = lr;
×
3831
                        file_lr.lr_end -= digits;
×
3832
                        auto line_lr = lr;
×
3833
                        line_lr.lr_start += diff;
×
3834
                        if (digits > 0) {
×
3835
                            file_lr.lr_end -= 1;
×
3836
                        }
3837
                        this->jlf_attr_line.al_attrs.emplace_back(
×
3838
                            lr, SA_SRC_LOC.value());
×
3839
                        this->jlf_attr_line.al_attrs.emplace_back(
×
3840
                            file_lr, SA_SRC_FILE.value());
×
3841
                        this->jlf_line_values.lvv_src_file_value
3842
                            = this->jlf_attr_line
3843
                                  .to_string_fragment(
×
3844
                                      this->jlf_attr_line.al_attrs.back())
×
3845
                                  .to_owned(
×
3846
                                      this->jlf_line_values.lvv_allocator);
×
3847
                        this->jlf_attr_line.al_attrs.emplace_back(
×
3848
                            line_lr, SA_SRC_LINE.value());
×
3849
                        this->jlf_line_values.lvv_src_line_value
3850
                            = this->jlf_attr_line
3851
                                  .to_string_fragment(
×
3852
                                      this->jlf_attr_line.al_attrs.back())
×
3853
                                  .to_owned(
×
3854
                                      this->jlf_line_values.lvv_allocator);
×
3855
                    } else if (name == this->elf_src_file_field) {
2,665✔
3856
                        this->jlf_attr_line.al_attrs.emplace_back(
2✔
3857
                            lr, SA_SRC_FILE.value());
4✔
3858
                        this->jlf_line_values.lvv_src_file_value
3859
                            = this->jlf_attr_line
3860
                                  .to_string_fragment(
2✔
3861
                                      this->jlf_attr_line.al_attrs.back())
2✔
3862
                                  .to_owned(
4✔
3863
                                      this->jlf_line_values.lvv_allocator);
4✔
3864
                    } else if (name == this->elf_src_line_field) {
2,663✔
3865
                        this->jlf_attr_line.al_attrs.emplace_back(
2✔
3866
                            lr, SA_SRC_LINE.value());
4✔
3867
                        this->jlf_line_values.lvv_src_line_value
3868
                            = this->jlf_attr_line
3869
                                  .to_string_fragment(
2✔
3870
                                      this->jlf_attr_line.al_attrs.back())
2✔
3871
                                  .to_owned(
4✔
3872
                                      this->jlf_line_values.lvv_allocator);
4✔
3873
                    } else if (name == this->elf_thread_id_field) {
2,661✔
3874
                        this->jlf_attr_line.al_attrs.emplace_back(
43✔
3875
                            lr, SA_THREAD_ID.value());
86✔
3876
                    } else if (name == this->elf_duration_field) {
2,618✔
3877
                        this->jlf_attr_line.al_attrs.emplace_back(
×
3878
                            lr, SA_DURATION.value());
×
3879
                    } else if (name == this->elf_level_field) {
2,618✔
3880
                        this->jlf_attr_line.al_attrs.emplace_back(
439✔
3881
                            lr, L_LEVEL.value());
878✔
3882
                    } else if (name == this->elf_opid_field && !lr.empty()) {
2,179✔
3883
                        this->jlf_attr_line.al_attrs.emplace_back(
337✔
3884
                            lr, L_OPID.value());
674✔
3885
                    }
3886
                    lv_iter->lv_origin = lr;
3,570✔
3887
                    lv_iter->lv_sub_offset = sub_offset;
3,570✔
3888
                    used_values[std::distance(
3,570✔
3889
                        this->jlf_line_values.lvv_values.begin(), lv_iter)]
3,570✔
3890
                        = true;
3,570✔
3891

3892
                    if (!jfe.jfe_suffix.empty()) {
3,570✔
3893
                        this->json_append_to_cache(jfe.jfe_suffix);
484✔
3894
                    }
3895
                } else if (jfe.jfe_value.pp_value == ts_field) {
9,469✔
3896
                    char ts[64];
3897
                    exttm et;
641✔
3898

3899
                    ll.to_exttm(et);
641✔
3900
                    et.et_flags |= this->lf_timestamp_flags;
641✔
3901
                    if (ts_extra_bits != nullptr) {
641✔
3902
                        et.et_nsec += ts_extra_bits->et_nsec % 1000;
583✔
3903
                        et.et_gmtoff = ts_extra_bits->et_gmtoff;
583✔
3904
                        et.et_flags |= ts_extra_bits->et_flags;
583✔
3905
                    }
3906
                    if (!jfe.jfe_prefix.empty()) {
641✔
3907
                        this->json_append_to_cache(jfe.jfe_prefix);
7✔
3908
                    }
3909
                    ssize_t ts_len;
3910
                    if (jfe.jfe_ts_format.empty()) {
641✔
3911
                        ts_len = this->lf_date_time.ftime(
588✔
3912
                            ts, sizeof(ts), this->get_timestamp_formats(), et);
3913
                    } else {
3914
                        ts_len = ftime_fmt(
53✔
3915
                            ts, sizeof(ts), jfe.jfe_ts_format.c_str(), et);
3916
                    }
3917
                    lr.lr_start = this->jlf_attr_line.al_string.size();
641✔
3918
                    this->json_append_to_cache(ts, ts_len);
641✔
3919
                    lr.lr_end = this->jlf_attr_line.al_string.size();
641✔
3920
                    this->jlf_attr_line.al_attrs.emplace_back(
641✔
3921
                        lr, L_TIMESTAMP.value());
1,282✔
3922
                    if (!jfe.jfe_suffix.empty()) {
641✔
3923
                        this->json_append_to_cache(jfe.jfe_suffix);
7✔
3924
                    }
3925
                    auto ts_iter = std::find_if(
641✔
3926
                        this->jlf_line_values.lvv_values.begin(),
3927
                        this->jlf_line_values.lvv_values.end(),
3928
                        logline_value_name_cmp(&this->lf_timestamp_field));
641✔
3929
                    if (ts_iter != this->jlf_line_values.lvv_values.end()) {
641✔
3930
                        used_values[std::distance(
641✔
3931
                            this->jlf_line_values.lvv_values.begin(), ts_iter)]
1,282✔
3932
                            = true;
641✔
3933
                    }
3934
                } else if (jfe.jfe_value.pp_value == level_field
5,258✔
3935
                           || jfe.jfe_value.pp_value == this->elf_level_field)
5,258✔
3936
                {
3937
                    auto level_name = ll.get_level_name();
62✔
3938
                    if (!jfe.jfe_prefix.empty()) {
62✔
3939
                        this->json_append_to_cache(jfe.jfe_prefix);
×
3940
                    }
3941
                    lr.lr_start = this->jlf_attr_line.al_string.size();
62✔
3942
                    this->json_append(lffs, jfe, nullptr, level_name);
62✔
3943
                    if (jfe.jfe_auto_width
124✔
3944
                        && level_name.length() < MAX_LEVEL_NAME_LEN)
62✔
3945
                    {
3946
                        this->json_append_to_cache(MAX_LEVEL_NAME_LEN
43✔
3947
                                                   - level_name.length());
43✔
3948
                    }
3949
                    lr.lr_end = this->jlf_attr_line.al_string.size();
62✔
3950
                    this->jlf_attr_line.al_attrs.emplace_back(lr,
62✔
3951
                                                              L_LEVEL.value());
124✔
3952
                    if (!jfe.jfe_suffix.empty()) {
62✔
3953
                        this->json_append_to_cache(jfe.jfe_suffix);
×
3954
                    }
3955
                    if (jfe.jfe_value.pp_value == this->elf_level_field) {
62✔
3956
                        auto lvl_iter = std::find_if(
×
3957
                            this->jlf_line_values.lvv_values.begin(),
3958
                            this->jlf_line_values.lvv_values.end(),
3959
                            logline_value_name_cmp(&this->elf_level_field));
×
3960
                        if (lvl_iter != this->jlf_line_values.lvv_values.end())
×
3961
                        {
3962
                            used_values[std::distance(
×
3963
                                this->jlf_line_values.lvv_values.begin(),
3964
                                lvl_iter)] = true;
×
3965
                        }
3966
                    }
3967
                } else if (jfe.jfe_value.pp_value == duration_field) {
5,196✔
3968
                    if (this->jlf_line_values.lvv_duration_value) {
10✔
3969
                        if (!jfe.jfe_prefix.empty()) {
2✔
3970
                            this->json_append_to_cache(jfe.jfe_prefix);
2✔
3971
                        }
3972
                        lr.lr_start = this->jlf_attr_line.al_string.size();
2✔
3973
                        auto dur_str
3974
                            = humanize::time::duration::from_tv(
2✔
3975
                                  to_timeval(this->jlf_line_values
×
3976
                                                 .lvv_duration_value.value()))
2✔
3977
                                  .with_compact(false)
2✔
3978
                                  .to_string();
2✔
3979
                        this->json_append(lffs, jfe, nullptr, dur_str);
2✔
3980
                        lr.lr_end = this->jlf_attr_line.al_string.size();
2✔
3981
                        this->jlf_attr_line.al_attrs.emplace_back(
2✔
3982
                            lr, SA_DURATION.value());
4✔
3983
                        if (!jfe.jfe_suffix.empty()) {
2✔
3984
                            this->json_append_to_cache(jfe.jfe_suffix);
×
3985
                        }
3986
                    } else if (!jfe.jfe_default_value.empty()) {
10✔
3987
                        if (!jfe.jfe_prefix.empty()) {
×
3988
                            this->json_append_to_cache(jfe.jfe_prefix);
×
3989
                        }
3990
                        this->json_append(
×
3991
                            lffs, jfe, nullptr, jfe.jfe_default_value);
×
3992
                        if (!jfe.jfe_suffix.empty()) {
×
3993
                            this->json_append_to_cache(jfe.jfe_suffix);
×
3994
                        }
3995
                    }
3996
                    if (!this->elf_duration_field.empty()) {
10✔
3997
                        auto dur_iter = std::find_if(
10✔
3998
                            this->jlf_line_values.lvv_values.begin(),
3999
                            this->jlf_line_values.lvv_values.end(),
4000
                            logline_value_name_cmp(&this->elf_duration_field));
10✔
4001
                        if (dur_iter != this->jlf_line_values.lvv_values.end())
10✔
4002
                        {
4003
                            used_values[std::distance(
2✔
4004
                                this->jlf_line_values.lvv_values.begin(),
4005
                                dur_iter)] = true;
2✔
4006
                        }
4007
                    }
4008
                } else if (!jfe.jfe_default_value.empty()) {
5,186✔
4009
                    if (!jfe.jfe_prefix.empty()) {
80✔
4010
                        this->json_append_to_cache(jfe.jfe_prefix);
×
4011
                    }
4012
                    this->json_append(
80✔
4013
                        lffs, jfe, nullptr, jfe.jfe_default_value);
80✔
4014
                    if (!jfe.jfe_suffix.empty()) {
80✔
4015
                        this->json_append_to_cache(jfe.jfe_suffix);
×
4016
                    }
4017
                }
4018

4019
                this->apply_text_transform(begin_size, jfe.jfe_text_transform);
9,469✔
4020
                break;
9,469✔
4021
            }
4022
            default:
×
4023
                break;
×
4024
        }
4025
    }
4026
}
984✔
4027

4028
void
4029
external_log_format::emit_detail_block(const std::vector<bool>& used_values,
984✔
4030
                                       int& sub_offset)
4031
{
4032
    static const intern_string_t body_name = intern_string::lookup("body", -1);
984✔
4033

4034
    for (size_t lpc = 0; lpc < this->jlf_line_values.lvv_values.size(); lpc++) {
9,044✔
4035
        auto& lv = this->jlf_line_values.lvv_values[lpc];
8,060✔
4036

4037
        if (lv.lv_meta.is_hidden() || used_values[lpc]
12,446✔
4038
            || body_name == lv.lv_meta.lvm_name)
12,446✔
4039
        {
4040
            continue;
7,298✔
4041
        }
4042

4043
        auto str = lv.to_string();
762✔
4044
        while (endswith(str, "\n")) {
776✔
4045
            str.pop_back();
14✔
4046
        }
4047

4048
        lv.lv_sub_offset = sub_offset;
762✔
4049
        lv.lv_origin.lr_start = this->jlf_attr_line.al_string.size() + 2
762✔
4050
            + lv.lv_meta.lvm_name.size() + 2;
762✔
4051
        auto frag = string_fragment::from_str(str);
762✔
4052
        line_range lr;
762✔
4053
        while (true) {
4054
            auto utf_scan_res = is_utf8(frag, '\n');
810✔
4055

4056
            this->json_append_to_cache("  ", 2);
810✔
4057
            this->json_append_to_cache(
810✔
4058
                lv.lv_meta.lvm_name.to_string_fragment());
810✔
4059
            this->json_append_to_cache(": ", 2);
810✔
4060
            lr.lr_start = this->jlf_attr_line.al_string.size();
810✔
4061
            this->json_append_to_cache(utf_scan_res.usr_valid_frag);
810✔
4062
            lr.lr_end = this->jlf_attr_line.al_string.size();
810✔
4063
            if (lv.lv_meta.lvm_name == this->elf_body_field) {
810✔
4064
                this->jlf_attr_line.al_attrs.emplace_back(lr, SA_BODY.value());
×
4065
            } else {
4066
                this->jlf_attr_line.al_attrs.emplace_back(
810✔
4067
                    lr, SA_EXTRA_CONTENT.value());
1,620✔
4068
            }
4069
            this->json_append_to_cache("\n", 1);
810✔
4070
            sub_offset += 1;
810✔
4071
            if (utf_scan_res.usr_remaining) {
810✔
4072
                frag = utf_scan_res.usr_remaining.value();
48✔
4073
            } else {
4074
                break;
762✔
4075
            }
4076
        }
48✔
4077
        lv.lv_origin.lr_end = this->jlf_attr_line.al_string.size() - 1;
762✔
4078
        if (lv.lv_meta.lvm_name == this->elf_opid_field
762✔
4079
            && !lv.lv_origin.empty())
762✔
4080
        {
4081
            this->jlf_attr_line.al_attrs.emplace_back(lv.lv_origin,
7✔
4082
                                                      L_OPID.value());
14✔
4083
        }
4084
    }
762✔
4085
}
984✔
4086

4087
void
4088
external_log_format::get_subline(const log_format_file_state& lffs,
10,761✔
4089
                                 const logline& ll,
4090
                                 shared_buffer_ref& sbr,
4091
                                 subline_options opts)
4092
{
4093
    if (this->elf_type == elf_type_t::ELF_TYPE_TEXT) {
10,761✔
4094
        return;
8,634✔
4095
    }
4096

4097
    if (this->elf_type == elf_type_t::ELF_TYPE_TABULAR) {
2,127✔
4098
        // Without a line-format, leave the raw row in place — formats
4099
        // that haven't opted into rewriting keep their pre-existing
4100
        // display.
4101
        if (this->jlf_line_format.empty()) {
229✔
4102
            return;
125✔
4103
        }
4104
        if (this->jlf_cached_offset != ll.get_offset()
104✔
4105
            || this->jlf_cached_opts != opts)
104✔
4106
        {
4107
            this->rewrite_tabular_subline(lffs, ll, sbr, opts);
58✔
4108
            this->jlf_cached_offset = ll.get_offset();
58✔
4109
            this->jlf_cached_opts = opts;
58✔
4110
        }
4111
        this->share_rewritten_subline(ll, sbr, opts);
104✔
4112
        return;
104✔
4113
    }
4114

4115
    if (this->jlf_cached_offset != ll.get_offset()
1,898✔
4116
        || this->jlf_cached_opts != opts)
1,898✔
4117
    {
4118
        auto& ypc = *(this->jlf_parse_context);
983✔
4119
        yajl_handle handle = this->jlf_yajl_handle.get();
983✔
4120
        json_log_userdata jlu(sbr, nullptr);
983✔
4121

4122
        jlu.jlu_subline_opts = opts;
983✔
4123

4124
        this->lf_desc_captures.clear();
983✔
4125
        this->lf_desc_allocator.reset();
983✔
4126
        this->jlf_share_manager.invalidate_refs();
983✔
4127
        this->jlf_attr_line.clear();
983✔
4128
        this->jlf_line_values.clear();
983✔
4129
        this->jlf_line_offsets.clear();
983✔
4130

4131
        auto line_frag = sbr.to_string_fragment();
983✔
4132

4133
        if (!line_frag.startswith("{")) {
983✔
4134
            this->jlf_attr_line.al_string.assign(line_frag.data(),
33✔
4135
                                                 line_frag.length());
33✔
4136
            this->jlf_line_values.clear();
33✔
4137
            sbr.share(this->jlf_share_manager,
66✔
4138
                      this->jlf_attr_line.al_string.data(),
33✔
4139
                      this->jlf_attr_line.al_string.size());
4140
            this->jlf_line_values.lvv_sbr = sbr.clone();
33✔
4141
            this->jlf_attr_line.al_attrs.emplace_back(
33✔
4142
                line_range{0, -1},
×
4143
                SA_INVALID.value(fmt::format(
66✔
4144
                    FMT_STRING("line at offset {} is not a JSON-line"),
66✔
4145
                    ll.get_offset())));
33✔
4146
            return;
33✔
4147
        }
4148

4149
        yajl_reset(handle);
950✔
4150
        ypc.set_static_handler(json_log_rewrite_handlers.jpc_children[0]);
950✔
4151
        ypc.ypc_userdata = &jlu;
950✔
4152
        ypc.ypc_ignore_unused = true;
950✔
4153
        ypc.ypc_alt_callbacks.yajl_start_array = json_array_start_const;
950✔
4154
        ypc.ypc_alt_callbacks.yajl_end_array = json_array_end;
950✔
4155
        ypc.ypc_alt_callbacks.yajl_start_map = json_array_start_const;
950✔
4156
        ypc.ypc_alt_callbacks.yajl_end_map = json_array_end;
950✔
4157
        jlu.jlu_format = this;
950✔
4158
        jlu.jlu_line = &ll;
950✔
4159
        jlu.jlu_handle = handle;
950✔
4160
        jlu.jlu_line_value = sbr.get_data();
950✔
4161
        jlu.jlu_format_hits.resize(this->jlf_line_format.size());
950✔
4162

4163
        yajl_status parse_status = yajl_parse(
1,900✔
4164
            handle, (const unsigned char*) sbr.get_data(), sbr.length());
950✔
4165
        if (parse_status != yajl_status_ok
950✔
4166
            || yajl_complete_parse(handle) != yajl_status_ok
950✔
4167
            || jlu.jlu_scan_error)
1,900✔
4168
        {
4169
            unsigned char* msg;
4170
            std::string full_msg;
24✔
4171

4172
            msg = yajl_get_error(
48✔
4173
                handle, 1, (const unsigned char*) sbr.get_data(), sbr.length());
24✔
4174
            if (msg != nullptr) {
24✔
4175
                full_msg = fmt::format(
24✔
4176
                    FMT_STRING("[offset: {}] {}\n{}"),
48✔
4177
                    ll.get_offset(),
24✔
4178
                    fmt::string_view{sbr.get_data(), sbr.length()},
24✔
4179
                    reinterpret_cast<char*>(msg));
48✔
4180
                yajl_free_error(handle, msg);
24✔
4181
            }
4182

4183
            this->jlf_attr_line.al_string.assign(full_msg.data(),
24✔
4184
                                                 full_msg.size());
4185
            this->jlf_line_values.clear();
24✔
4186
            this->jlf_attr_line.al_attrs.emplace_back(
24✔
4187
                line_range{0, -1},
×
4188
                SA_INVALID.value(jlu.jlu_scan_error
72✔
4189
                                     ? jlu.jlu_scan_error->se_message
56✔
4190
                                     : "JSON line failed to parse"));
4191
        } else {
24✔
4192
            this->jlf_used_values.assign(
926✔
4193
                this->jlf_line_values.lvv_values.size(), false);
926✔
4194
            for (auto lv_iter = this->jlf_line_values.lvv_values.begin();
926✔
4195
                 lv_iter != this->jlf_line_values.lvv_values.end();
8,751✔
4196
                 ++lv_iter)
7,825✔
4197
            {
4198
                lv_iter->lv_meta.lvm_format = this;
7,825✔
4199
            }
4200
            if (!this->jlf_line_values.lvv_time_value) {
926✔
4201
                this->jlf_line_values.lvv_time_exttm = jlu.jlu_exttm;
94✔
4202
            }
4203

4204
            if (jlu.jlu_tid_number) {
926✔
4205
                char tid_buf[128];
4206
                auto to_n_res = fmt::format_to_n(tid_buf,
71✔
4207
                                                 sizeof(tid_buf) - 1,
4208
                                                 FMT_STRING("{}"),
213✔
4209
                                                 jlu.jlu_tid_number.value());
4210
                *to_n_res.out = '\0';
71✔
4211
                this->jlf_line_values.lvv_thread_id_value
4212
                    = string_fragment::from_c_str(tid_buf).to_owned(
213✔
4213
                        this->jlf_line_values.lvv_allocator);
142✔
4214
            } else if (jlu.jlu_tid_frag) {
855✔
4215
                this->jlf_line_values.lvv_thread_id_value
4216
                    = jlu.jlu_tid_frag->to_owned(
×
4217
                        this->jlf_line_values.lvv_allocator);
×
4218
            }
4219

4220
            if (jlu.jlu_start_time && jlu.jlu_end_time && !jlu.jlu_duration) {
926✔
4221
                if (jlu.jlu_end_time.value() > jlu.jlu_start_time.value()) {
3✔
4222
                    jlu.jlu_duration
4223
                        = jlu.jlu_end_time.value() - jlu.jlu_start_time.value();
3✔
4224
                }
4225
            }
4226

4227
            auto use_opid_hasher = false;
926✔
4228
            if (this->elf_opid_field.empty()
926✔
4229
                && this->lf_opid_source.value_or(
567✔
4230
                       opid_source_t::from_description)
567✔
4231
                    == opid_source_t::from_description
4232
                && this->lf_opid_description_def->size() == 1)
1,493✔
4233
            {
4234
                const auto& od = this->lf_opid_description_def->begin()->second;
90✔
4235
                for (const auto& desc : *od.od_descriptors) {
270✔
4236
                    auto desc_iter
4237
                        = this->lf_desc_captures.find(desc.od_field.pp_value);
180✔
4238
                    if (desc_iter == this->lf_desc_captures.end()) {
180✔
4239
                        continue;
120✔
4240
                    }
4241
                    jlu.jlu_opid_hasher.update(desc_iter->second);
60✔
4242
                    use_opid_hasher = true;
60✔
4243
                }
4244
            } else if (!jlu.jlu_opid_desc_frag && !jlu.jlu_opid_frag
1,657✔
4245
                       && jlu.jlu_duration)
1,657✔
4246
            {
4247
                jlu.jlu_opid_hasher.update(line_frag);
2✔
4248
                use_opid_hasher = true;
2✔
4249
            } else if (jlu.jlu_opid_frag) {
834✔
4250
                this->jlf_line_values.lvv_opid_value
4251
                    = jlu.jlu_opid_frag->to_string();
×
4252
                this->jlf_line_values.lvv_opid_provenance
4253
                    = logline_value_vector::opid_provenance::file;
×
4254
            } else if (jlu.jlu_opid_desc_frag) {
834✔
4255
                use_opid_hasher = true;
15✔
4256
            }
4257
            if (use_opid_hasher) {
926✔
4258
                this->jlf_line_values.lvv_opid_value
4259
                    = jlu.jlu_opid_hasher.to_string();
47✔
4260
                this->jlf_line_values.lvv_opid_provenance
4261
                    = logline_value_vector::opid_provenance::file;
47✔
4262
            }
4263
            this->jlf_line_values.lvv_duration_value = jlu.jlu_duration;
926✔
4264

4265
            int sub_offset = this->jlf_line_format_init_count;
926✔
4266

4267
            this->render_line_format(lffs,
926✔
4268
                                     ll,
4269
                                     opts,
4270
                                     &jlu.jlu_exttm,
4271
                                     this->jlf_used_values,
926✔
4272
                                     sub_offset);
4273

4274
            this->json_append_to_cache("\n", 1);
926✔
4275
            sub_offset += 1;
926✔
4276

4277
            this->emit_detail_block(this->jlf_used_values, sub_offset);
926✔
4278
        }
4279

4280
        this->compute_subline_offsets();
950✔
4281
        this->jlf_cached_offset = ll.get_offset();
950✔
4282
        this->jlf_cached_opts = opts;
950✔
4283
    }
983✔
4284

4285
    this->share_rewritten_subline(ll, sbr, opts);
1,865✔
4286
}
4287

4288
struct compiled_header_expr {
4289
    auto_mem<sqlite3_stmt> che_stmt{sqlite3_finalize};
4290
    bool che_enabled{true};
4291
};
4292

4293
struct format_header_expressions : public lnav_config_listener {
4294
    format_header_expressions() : lnav_config_listener(__FILE__) {}
1,354✔
4295

4296
    auto_sqlite3 e_db;
4297
    std::map<intern_string_t, std::map<std::string, compiled_header_expr>>
4298
        e_header_exprs;
4299
};
4300

4301
using safe_format_header_expressions = safe::Safe<format_header_expressions>;
4302

4303
static safe_format_header_expressions format_header_exprs;
4304

4305
std::optional<external_file_format>
4306
detect_mime_type(const std::filesystem::path& filename)
816✔
4307
{
4308
    uint8_t buffer[1024];
4309
    size_t buffer_size = 0;
816✔
4310

4311
    {
4312
        auto_fd fd;
816✔
4313

4314
        if ((fd = lnav::filesystem::openp(filename, O_RDONLY)) == -1) {
816✔
4315
            return std::nullopt;
×
4316
        }
4317

4318
        ssize_t rc;
4319

4320
        if ((rc = read(fd, buffer, sizeof(buffer))) == -1) {
816✔
4321
            return std::nullopt;
×
4322
        }
4323
        buffer_size = rc;
816✔
4324
    }
816✔
4325

4326
    auto hexbuf = auto_buffer::alloc(buffer_size * 2);
816✔
4327

4328
    for (size_t lpc = 0; lpc < buffer_size; lpc++) {
409,542✔
4329
        fmt::format_to(
408,726✔
4330
            std::back_inserter(hexbuf), FMT_STRING("{:02x}"), buffer[lpc]);
1,634,904✔
4331
    }
4332

4333
    safe::WriteAccess<safe_format_header_expressions> in(format_header_exprs);
816✔
4334

4335
    for (const auto& format : log_format::get_root_formats()) {
66,134✔
4336
        auto elf = std::dynamic_pointer_cast<external_log_format>(format);
65,318✔
4337
        if (elf == nullptr) {
65,318✔
4338
            continue;
4,896✔
4339
        }
4340

4341
        if (elf->elf_converter.c_header.h_exprs.he_exprs.empty()) {
60,422✔
4342
            continue;
58,790✔
4343
        }
4344

4345
        if (buffer_size < elf->elf_converter.c_header.h_size) {
1,632✔
4346
            log_debug(
126✔
4347
                "%s: file content too small (%zu) for header detection: %s",
4348
                filename.c_str(),
4349
                buffer_size,
4350
                elf->get_name().get());
4351
            continue;
126✔
4352
        }
4353
        for (const auto& hpair : elf->elf_converter.c_header.h_exprs.he_exprs) {
5,391✔
4354
            auto& he = in->e_header_exprs[elf->get_name()][hpair.first];
3,885✔
4355

4356
            if (!he.che_enabled) {
3,885✔
4357
                continue;
×
4358
            }
4359

4360
            auto* stmt = he.che_stmt.in();
3,885✔
4361

4362
            if (stmt == nullptr) {
3,885✔
4363
                continue;
×
4364
            }
4365
            sqlite3_reset(stmt);
3,885✔
4366
            auto count = sqlite3_bind_parameter_count(stmt);
3,885✔
4367
            for (int lpc = 0; lpc < count; lpc++) {
7,770✔
4368
                const auto* name = sqlite3_bind_parameter_name(stmt, lpc + 1);
3,885✔
4369

4370
                if (name[0] == '$') {
3,885✔
4371
                    const char* env_value;
4372

4373
                    if ((env_value = getenv(&name[1])) != nullptr) {
×
4374
                        sqlite3_bind_text(
×
4375
                            stmt, lpc + 1, env_value, -1, SQLITE_STATIC);
4376
                    }
4377
                    continue;
×
4378
                }
4379
                if (strcmp(name, ":header") == 0) {
3,885✔
4380
                    sqlite3_bind_text(stmt,
3,885✔
4381
                                      lpc + 1,
4382
                                      hexbuf.in(),
3,885✔
4383
                                      hexbuf.size(),
3,885✔
4384
                                      SQLITE_STATIC);
4385
                    continue;
3,885✔
4386
                }
4387
                if (strcmp(name, ":filepath") == 0) {
×
4388
                    sqlite3_bind_text(
×
4389
                        stmt, lpc + 1, filename.c_str(), -1, SQLITE_STATIC);
4390
                    continue;
×
4391
                }
4392
            }
4393

4394
            auto step_res = sqlite3_step(stmt);
3,885✔
4395

4396
            switch (step_res) {
3,885✔
4397
                case SQLITE_OK:
3,885✔
4398
                case SQLITE_DONE:
4399
                    continue;
3,885✔
4400
                case SQLITE_ROW:
×
4401
                    break;
×
4402
                default: {
×
4403
                    log_error(
×
4404
                        "failed to execute file-format header expression: "
4405
                        "%s:%s -- %s",
4406
                        elf->get_name().get(),
4407
                        hpair.first.c_str(),
4408
                        sqlite3_errmsg(in->e_db));
4409
                    he.che_enabled = false;
×
4410
                    continue;
×
4411
                }
4412
            }
4413

4414
            log_info("detected format for: %s -- %s (header-expr: %s)",
×
4415
                     filename.c_str(),
4416
                     elf->get_name().get(),
4417
                     hpair.first.c_str());
4418
            return external_file_format{
×
4419
                elf->get_name().to_string(),
×
4420
                elf->elf_converter.c_command.pp_value,
×
4421
                elf->elf_converter.c_command.pp_location.sl_source.to_string(),
×
4422
            };
4423
        }
4424
    }
65,318✔
4425

4426
    return std::nullopt;
816✔
4427
}
816✔
4428

4429
log_format::scan_result_t
4430
log_format::test_line(sample_t& sample,
×
4431
                      std::vector<lnav::console::user_message>& msgs)
4432
{
4433
    return scan_no_match{};
×
4434
}
4435

4436
log_format::scan_result_t
4437
external_log_format::test_line(sample_t& sample,
240,446✔
4438
                               std::vector<lnav::console::user_message>& msgs)
4439
{
4440
    auto lines
4441
        = string_fragment::from_str(sample.s_line.pp_value).split_lines();
240,446✔
4442

4443
    if (this->elf_type == elf_type_t::ELF_TYPE_JSON) {
240,446✔
4444
        auto alloc = ArenaAlloc::Alloc<char>{};
3,714✔
4445
        pattern_locks pats;
3,714✔
4446
        auto sbc = scan_batch_context{
3,714✔
4447
            alloc,
4448
            pats,
4449
        };
3,714✔
4450
        sbc.sbc_value_stats.resize(this->elf_value_defs.size());
3,714✔
4451
        std::vector<logline> dst;
3,714✔
4452
        auto li = line_info{
3,714✔
4453
            {0, lines[0].length()},
3,714✔
4454
        };
3,714✔
4455
        shared_buffer sb;
3,714✔
4456
        shared_buffer_ref sbr;
3,714✔
4457
        dst.emplace_back(0, 0us, LEVEL_UNKNOWN);
3,714✔
4458
        sbr.share(sb, lines[0].data(), (size_t) lines[0].length());
3,714✔
4459

4460
        return this->scan_json(dst, li, sbr, sbc);
3,714✔
4461
    }
3,714✔
4462

4463
    scan_result_t retval = scan_no_match{"no patterns matched"};
236,732✔
4464
    auto found = false;
236,732✔
4465

4466
    for (auto pat_iter = this->elf_pattern_order.begin();
236,732✔
4467
         pat_iter != this->elf_pattern_order.end();
1,734,858✔
4468
         ++pat_iter)
1,498,126✔
4469
    {
4470
        auto& pat = *(*pat_iter);
1,498,126✔
4471

4472
        if (!pat.p_pcre.pp_value) {
1,498,126✔
4473
            continue;
1,261,396✔
4474
        }
4475

4476
        auto md = pat.p_pcre.pp_value->create_match_data();
1,498,126✔
4477
        auto match_res = pat.p_pcre.pp_value->capture_from(lines[0])
1,498,126✔
4478
                             .into(md)
1,498,126✔
4479
                             .matches(PCRE2_NO_UTF_CHECK)
2,996,252✔
4480
                             .ignore_error();
1,498,126✔
4481
        if (!match_res) {
1,498,126✔
4482
            continue;
1,261,396✔
4483
        }
4484
        retval = scan_match{1000};
236,730✔
4485
        found = true;
236,730✔
4486

4487
        sample.s_matched_regexes.insert(pat.p_name.to_string());
236,730✔
4488

4489
        const auto ts_cap = md[pat.p_timestamp_field_index];
236,730✔
4490
        const auto level_cap = md[pat.p_level_field_index];
236,730✔
4491
        const char* const* custom_formats = this->get_timestamp_formats();
236,730✔
4492
        date_time_scanner dts;
236,730✔
4493
        timeval tv;
4494
        exttm tm;
236,730✔
4495

4496
        if (ts_cap && ts_cap->sf_begin == 0) {
236,730✔
4497
            pat.p_timestamp_end = ts_cap->sf_end;
146,261✔
4498
        }
4499
        const char* dts_scan_res = nullptr;
236,730✔
4500

4501
        if (ts_cap) {
236,730✔
4502
            dts_scan_res = dts.scan(
236,728✔
4503
                ts_cap->data(), ts_cap->length(), custom_formats, &tm, tv);
236,728✔
4504
        }
4505
        if (dts_scan_res != nullptr) {
236,730✔
4506
            if (dts_scan_res != ts_cap->data() + ts_cap->length()) {
236,727✔
4507
                auto match_len = dts_scan_res - ts_cap->data();
×
4508
                auto notes = attr_line_t("the used timestamp format: ");
×
4509
                if (custom_formats == nullptr) {
×
4510
                    notes.append(PTIMEC_FORMATS[dts.dts_fmt_lock].pf_fmt);
×
4511
                } else {
4512
                    notes.append(custom_formats[dts.dts_fmt_lock]);
×
4513
                }
4514
                notes.append("\n  ")
×
4515
                    .append(ts_cap.value())
×
4516
                    .append("\n")
×
4517
                    .append(2 + match_len, ' ')
×
4518
                    .append("^ matched up to here"_snippet_border);
×
4519
                auto um = lnav::console::user_message::warning(
×
4520
                              attr_line_t("timestamp was not fully matched: ")
×
4521
                                  .append_quoted(ts_cap.value()))
×
4522
                              .with_snippet(sample.s_line.to_snippet())
×
4523
                              .with_note(notes)
×
4524
                              .move();
×
4525

4526
                msgs.emplace_back(um);
×
4527
            }
4528
        } else if (!ts_cap) {
3✔
4529
            msgs.emplace_back(
2✔
4530
                lnav::console::user_message::error(
×
4531
                    attr_line_t("invalid sample log message: ")
4✔
4532
                        .append(lnav::to_json(sample.s_line.pp_value)))
4✔
4533
                    .with_reason(attr_line_t("timestamp was not captured"))
4✔
4534
                    .with_snippet(sample.s_line.to_snippet())
4✔
4535
                    .with_help(attr_line_t(
4✔
4536
                        "A timestamp needs to be captured in order for a "
4537
                        "line to be recognized as a log message")));
4538
        } else {
4539
            attr_line_t notes;
1✔
4540

4541
            if (custom_formats == nullptr) {
1✔
4542
                notes.append("the following built-in formats were tried:");
×
4543
                for (int lpc = 0; PTIMEC_FORMATS[lpc].pf_fmt != nullptr; lpc++)
×
4544
                {
4545
                    off_t off = 0;
×
4546

4547
                    PTIMEC_FORMATS[lpc].pf_func(
×
4548
                        &tm, ts_cap->data(), off, ts_cap->length());
×
4549
                    notes.append("\n  ")
×
4550
                        .append(ts_cap.value())
×
4551
                        .append("\n")
×
4552
                        .append(2 + off, ' ')
×
4553
                        .append("^ "_snippet_border)
×
4554
                        .append_quoted(
×
4555
                            lnav::roles::symbol(PTIMEC_FORMATS[lpc].pf_fmt))
×
4556
                        .append(" matched up to here"_snippet_border);
×
4557
                }
4558
            } else {
4559
                notes.append("the following custom formats were tried:");
1✔
4560
                for (int lpc = 0; custom_formats[lpc] != nullptr; lpc++) {
2✔
4561
                    off_t off = 0;
1✔
4562

4563
                    ptime_fmt(custom_formats[lpc],
1✔
4564
                              &tm,
4565
                              ts_cap->data(),
4566
                              off,
4567
                              ts_cap->length());
1✔
4568
                    notes.append("\n  ")
1✔
4569
                        .append(ts_cap.value())
1✔
4570
                        .append("\n")
1✔
4571
                        .append(2 + off, ' ')
1✔
4572
                        .append("^ "_snippet_border)
1✔
4573
                        .append_quoted(lnav::roles::symbol(custom_formats[lpc]))
2✔
4574
                        .append(" matched up to here"_snippet_border);
1✔
4575
                }
4576
            }
4577

4578
            msgs.emplace_back(
1✔
4579
                lnav::console::user_message::error(
×
4580
                    attr_line_t("invalid sample log message: ")
1✔
4581
                        .append(lnav::to_json(sample.s_line.pp_value)))
2✔
4582
                    .with_reason(attr_line_t("unrecognized timestamp -- ")
2✔
4583
                                     .append(ts_cap.value()))
1✔
4584
                    .with_snippet(sample.s_line.to_snippet())
2✔
4585
                    .with_note(notes)
1✔
4586
                    .with_help(attr_line_t("If the timestamp format is not "
2✔
4587
                                           "supported by default, you can "
4588
                                           "add a custom format with the ")
4589
                                   .append_quoted("timestamp-format"_symbol)
1✔
4590
                                   .append(" property")));
1✔
4591
        }
1✔
4592

4593
        auto level = this->convert_level(
236,730✔
4594
            level_cap.value_or(string_fragment::invalid()), nullptr);
236,730✔
4595

4596
        if (sample.s_level != LEVEL_UNKNOWN && sample.s_level != level) {
236,730✔
4597
            attr_line_t note_al;
1✔
4598

4599
            note_al.append("matched regex = ")
1✔
4600
                .append(lnav::roles::symbol(pat.p_name.to_string()))
2✔
4601
                .append("\n")
1✔
4602
                .append("captured level = ")
1✔
4603
                .append_quoted(level_cap->to_string());
1✔
4604
            if (level_cap && !this->elf_level_patterns.empty()) {
1✔
4605
                thread_local auto md = lnav::pcre2pp::match_data::unitialized();
1✔
4606

4607
                note_al.append("\nlevel regular expression match results:");
1✔
4608
                for (const auto& level_pattern : this->elf_level_patterns) {
3✔
4609
                    attr_line_t regex_al
4610
                        = level_pattern.second.lp_pcre.pp_value->get_pattern();
2✔
4611
                    lnav::snippets::regex_highlighter(
2✔
4612
                        regex_al, -1, line_range{0, (int) regex_al.length()});
2✔
4613
                    note_al.append("\n  ")
2✔
4614
                        .append(lnav::roles::symbol(
4✔
4615
                            level_pattern.second.lp_pcre.pp_path.to_string()))
4✔
4616
                        .append(" = ")
2✔
4617
                        .append(regex_al)
2✔
4618
                        .append("\n    ");
2✔
4619
                    auto match_res = level_pattern.second.lp_pcre.pp_value
2✔
4620
                                         ->capture_from(level_cap.value())
2✔
4621
                                         .into(md)
2✔
4622
                                         .matches(PCRE2_NO_UTF_CHECK)
4✔
4623
                                         .ignore_error();
2✔
4624
                    if (!match_res) {
2✔
4625
                        note_al.append(lnav::roles::warning("no match"));
1✔
4626
                        continue;
1✔
4627
                    }
4628

4629
                    note_al.append(level_cap.value())
1✔
4630
                        .append("\n    ")
1✔
4631
                        .append(md.leading().length(), ' ')
1✔
4632
                        .append("^"_snippet_border);
1✔
4633
                    if (match_res->f_all.length() > 2) {
1✔
4634
                        note_al.append(lnav::roles::snippet_border(
1✔
4635
                            std::string(match_res->f_all.length() - 2, '-')));
3✔
4636
                    }
4637
                    if (match_res->f_all.length() > 1) {
1✔
4638
                        note_al.append("^"_snippet_border);
1✔
4639
                    }
4640
                }
2✔
4641
            }
4642
            auto um
4643
                = lnav::console::user_message::error(
×
4644
                      attr_line_t("invalid sample log message: ")
1✔
4645
                          .append(lnav::to_json(sample.s_line.pp_value)))
2✔
4646
                      .with_reason(attr_line_t()
2✔
4647
                                       .append_quoted(lnav::roles::symbol(
2✔
4648
                                           level_names[level]))
1✔
4649
                                       .append(" does not match the expected "
1✔
4650
                                               "level of ")
4651
                                       .append_quoted(lnav::roles::symbol(
2✔
4652
                                           level_names[sample.s_level])))
1✔
4653
                      .with_snippet(sample.s_line.to_snippet())
2✔
4654
                      .with_note(note_al)
1✔
4655
                      .move();
1✔
4656
            if (!this->elf_level_patterns.empty()) {
1✔
4657
                um.with_help(
1✔
4658
                    attr_line_t("Level regexes are not anchored to the "
2✔
4659
                                "start/end of the string.  Prepend ")
4660
                        .append_quoted("^"_symbol)
1✔
4661
                        .append(" to the expression to match from the "
1✔
4662
                                "start of the string and append ")
4663
                        .append_quoted("$"_symbol)
1✔
4664
                        .append(" to match up to the end of the string."));
1✔
4665
            }
4666
            msgs.emplace_back(um);
1✔
4667
        }
1✔
4668

4669
        {
4670
            auto full_match_res
4671
                = pat.p_pcre.pp_value->capture_from(sample.s_line.pp_value)
236,730✔
4672
                      .into(md)
236,730✔
4673
                      .matches()
473,460✔
4674
                      .ignore_error();
236,730✔
4675
            if (!full_match_res) {
236,730✔
4676
                attr_line_t regex_al = pat.p_pcre.pp_value->get_pattern();
1✔
4677
                lnav::snippets::regex_highlighter(
1✔
4678
                    regex_al, -1, line_range{0, (int) regex_al.length()});
1✔
4679
                msgs.emplace_back(
1✔
4680
                    lnav::console::user_message::error(
×
4681
                        attr_line_t("invalid pattern: ")
1✔
4682
                            .append_quoted(
1✔
4683
                                lnav::roles::symbol(pat.p_name.to_string())))
2✔
4684
                        .with_reason("pattern does not match entire "
2✔
4685
                                     "multiline sample message")
4686
                        .with_snippet(sample.s_line.to_snippet())
2✔
4687
                        .with_note(attr_line_t()
2✔
4688
                                       .append(lnav::roles::symbol(
1✔
4689
                                           pat.p_name.to_string()))
2✔
4690
                                       .append(" = ")
1✔
4691
                                       .append(regex_al))
1✔
4692
                        .with_help(
4693
                            attr_line_t("use ").append_quoted(".*").append(
2✔
4694
                                " to match new-lines")));
4695
            } else if (static_cast<size_t>(full_match_res->f_all.length())
236,730✔
4696
                       != sample.s_line.pp_value.length())
236,729✔
4697
            {
4698
                attr_line_t regex_al = pat.p_pcre.pp_value->get_pattern();
1✔
4699
                lnav::snippets::regex_highlighter(
1✔
4700
                    regex_al, -1, line_range{0, (int) regex_al.length()});
1✔
4701
                auto match_length
4702
                    = static_cast<size_t>(full_match_res->f_all.length());
1✔
4703
                attr_line_t sample_al = sample.s_line.pp_value;
1✔
4704
                sample_al.append("\n")
1✔
4705
                    .append(match_length, ' ')
1✔
4706
                    .append("^ matched up to here"_error)
1✔
4707
                    .with_attr_for_all(VC_ROLE.value(role_t::VCR_QUOTED_CODE));
1✔
4708
                auto sample_snippet = lnav::console::snippet::from(
4709
                    sample.s_line.pp_location, sample_al);
1✔
4710
                msgs.emplace_back(
1✔
4711
                    lnav::console::user_message::error(
×
4712
                        attr_line_t("invalid pattern: ")
1✔
4713
                            .append_quoted(
1✔
4714
                                lnav::roles::symbol(pat.p_name.to_string())))
2✔
4715
                        .with_reason("pattern does not match entire "
2✔
4716
                                     "message")
4717
                        .with_snippet(sample_snippet)
1✔
4718
                        .with_note(attr_line_t()
3✔
4719
                                       .append(lnav::roles::symbol(
2✔
4720
                                           pat.p_name.to_string()))
2✔
4721
                                       .append(" = ")
1✔
4722
                                       .append(regex_al))
1✔
4723
                        .with_help("update the regular expression to fully "
4724
                                   "capture the sample message"));
4725
            }
1✔
4726
        }
4727
    }
1,498,126✔
4728

4729
    if (!found && !this->elf_pattern_order.empty()) {
236,732✔
4730
        std::vector<std::pair<ssize_t, intern_string_t>> partial_indexes;
2✔
4731
        attr_line_t notes;
2✔
4732
        size_t max_name_width = 0;
2✔
4733

4734
        for (const auto& pat_iter : this->elf_pattern_order) {
10✔
4735
            auto& pat = *pat_iter;
8✔
4736

4737
            if (!pat.p_pcre.pp_value) {
8✔
4738
                continue;
×
4739
            }
4740

4741
            partial_indexes.emplace_back(
8✔
4742
                pat.p_pcre.pp_value->match_partial(lines[0]), pat.p_name);
8✔
4743
            max_name_width = std::max(max_name_width, pat.p_name.size());
8✔
4744
        }
4745
        for (const auto& line_frag : lines) {
4✔
4746
            auto src_line = attr_line_t(line_frag.to_string());
2✔
4747
            if (!line_frag.endswith("\n")) {
2✔
4748
                src_line.append("\n");
2✔
4749
            }
4750
            src_line.with_attr_for_all(VC_ROLE.value(role_t::VCR_QUOTED_CODE));
2✔
4751
            notes.append("   ").append(src_line);
2✔
4752
            for (auto& part_pair : partial_indexes) {
10✔
4753
                if (part_pair.first >= 0
16✔
4754
                    && part_pair.first < line_frag.length())
8✔
4755
                {
4756
                    notes.append("   ")
8✔
4757
                        .append(part_pair.first, ' ')
8✔
4758
                        .append("^ "_snippet_border)
8✔
4759
                        .append(
8✔
4760
                            lnav::roles::symbol(part_pair.second.to_string()))
16✔
4761
                        .append(" matched up to here"_snippet_border)
8✔
4762
                        .append("\n");
8✔
4763
                }
4764
                part_pair.first -= line_frag.length();
8✔
4765
            }
4766
        }
2✔
4767
        notes.add_header(
2✔
4768
            "the following shows how each pattern matched this sample:\n");
4769

4770
        attr_line_t regex_note;
2✔
4771
        for (const auto& pat_iter : this->elf_pattern_order) {
10✔
4772
            if (!pat_iter->p_pcre.pp_value) {
8✔
4773
                regex_note
4774
                    .append(lnav::roles::symbol(fmt::format(
×
4775
                        FMT_STRING("{:{}}"), pat_iter->p_name, max_name_width)))
×
4776
                    .append(" is invalid");
×
4777
                continue;
×
4778
            }
4779

4780
            attr_line_t regex_al = pat_iter->p_pcre.pp_value->get_pattern();
8✔
4781
            lnav::snippets::regex_highlighter(
8✔
4782
                regex_al, -1, line_range{0, (int) regex_al.length()});
8✔
4783

4784
            regex_note
4785
                .append(lnav::roles::symbol(fmt::format(
16✔
4786
                    FMT_STRING("{:{}}"), pat_iter->p_name, max_name_width)))
24✔
4787
                .append(" = ")
8✔
4788
                .append_quoted(regex_al)
16✔
4789
                .append("\n");
8✔
4790
        }
8✔
4791

4792
        msgs.emplace_back(
2✔
4793
            lnav::console::user_message::error(
×
4794
                attr_line_t("invalid sample log message: ")
2✔
4795
                    .append(lnav::to_json(sample.s_line.pp_value)))
4✔
4796
                .with_reason("sample does not match any patterns")
4✔
4797
                .with_snippet(sample.s_line.to_snippet())
4✔
4798
                .with_note(notes.rtrim())
4✔
4799
                .with_note(regex_note));
4800
    }
2✔
4801

4802
    return retval;
236,732✔
4803
}
240,446✔
4804

4805
void
4806
external_log_format::build(std::vector<lnav::console::user_message>& errors)
69,322✔
4807
{
4808
    auto& vc = view_colors::singleton();
69,322✔
4809

4810
    if (!this->lf_timestamp_field.empty()) {
69,322✔
4811
        auto& vd = this->elf_value_defs[this->lf_timestamp_field];
69,322✔
4812
        if (vd.get() == nullptr) {
69,322✔
4813
            vd = std::make_shared<value_def>(
49,498✔
4814
                this->lf_timestamp_field,
49,498✔
4815
                value_kind_t::VALUE_TEXT,
×
4816
                logline_value_meta::internal_column{},
×
4817
                this);
49,498✔
4818
            if (this->elf_type == elf_type_t::ELF_TYPE_JSON) {
49,498✔
4819
                this->elf_value_def_order.emplace_back(vd);
8,914✔
4820
            }
4821
        }
4822
        vd->vd_meta.lvm_name = this->lf_timestamp_field;
69,322✔
4823
        vd->vd_meta.lvm_kind = value_kind_t::VALUE_TEXT;
69,322✔
4824
        vd->vd_meta.lvm_column = logline_value_meta::internal_column{};
69,322✔
4825
        vd->vd_internal = true;
69,322✔
4826

4827
        this->elf_value_defs[LOG_TIME_STR] = vd;
69,322✔
4828
    }
4829

4830
    if (!this->lf_subsecond_field.empty()) {
69,322✔
4831
        if (!this->lf_subsecond_unit.has_value()) {
113✔
4832
            errors.emplace_back(
1✔
4833
                lnav::console::user_message::error(
×
4834
                    attr_line_t()
2✔
4835
                        .append_quoted(
1✔
4836
                            lnav::roles::symbol(this->elf_name.to_string()))
2✔
4837
                        .append(" is not a valid log format"))
1✔
4838
                    .with_reason(attr_line_t()
2✔
4839
                                     .append_quoted("subsecond-units"_symbol)
1✔
4840
                                     .append(" must be set when ")
1✔
4841
                                     .append_quoted("subsecond-field"_symbol)
1✔
4842
                                     .append(" is used"))
1✔
4843
                    .with_snippets(this->get_snippets()));
2✔
4844
        } else {
4845
            auto& vd = this->elf_value_defs[this->lf_subsecond_field];
112✔
4846
            if (vd.get() == nullptr) {
112✔
4847
                vd = std::make_shared<value_def>(
112✔
4848
                    this->lf_subsecond_field,
112✔
4849
                    value_kind_t::VALUE_INTEGER,
×
4850
                    logline_value_meta::internal_column{},
×
4851
                    this);
112✔
4852
                if (this->elf_type == elf_type_t::ELF_TYPE_JSON) {
112✔
4853
                    this->elf_value_def_order.emplace_back(vd);
112✔
4854
                }
4855
            }
4856
            vd->vd_meta.lvm_name = this->lf_subsecond_field;
112✔
4857
            vd->vd_meta.lvm_column = logline_value_meta::internal_column{};
112✔
4858
            vd->vd_meta.lvm_hidden = true;
112✔
4859
            vd->vd_internal = true;
112✔
4860
        }
4861
    }
4862

4863
    if (startswith(this->elf_level_field.get(), "/")) {
69,322✔
4864
        this->elf_level_field
4865
            = intern_string::lookup(this->elf_level_field.get() + 1);
224✔
4866
    }
4867
    if (!this->elf_level_field.empty()) {
69,322✔
4868
        auto level_iter = this->elf_value_defs.find(this->elf_level_field);
69,322✔
4869
        if (level_iter == this->elf_value_defs.end()) {
69,322✔
4870
            auto& vd = this->elf_value_defs[this->elf_level_field];
31,641✔
4871
            vd = std::make_shared<value_def>(
31,641✔
4872
                this->elf_level_field,
31,641✔
4873
                value_kind_t::VALUE_TEXT,
×
4874
                logline_value_meta::internal_column{},
×
4875
                this);
31,641✔
4876
            if (this->elf_type == elf_type_t::ELF_TYPE_JSON) {
31,641✔
4877
                this->elf_value_def_order.emplace_back(vd);
3,234✔
4878
            }
4879
            vd->vd_meta.lvm_name = this->elf_level_field;
31,641✔
4880
            vd->vd_meta.lvm_column = logline_value_meta::internal_column{};
31,641✔
4881
            vd->vd_internal = true;
31,641✔
4882

4883
            if (this->elf_level_field != this->elf_body_field) {
31,641✔
4884
                this->elf_value_defs[LOG_LEVEL_STR] = vd;
30,713✔
4885
            }
4886
        } else {
4887
            if (level_iter->second->vd_meta.lvm_kind
37,681✔
4888
                != value_kind_t::VALUE_TEXT)
37,681✔
4889
            {
4890
                this->lf_level_hideable = false;
6,608✔
4891
            }
4892
            this->elf_value_defs[LOG_LEVEL_STR] = level_iter->second;
37,681✔
4893
        }
4894
    }
4895

4896
    auto opid_field_iter = this->elf_value_defs.find(LOG_OPID_STR);
69,322✔
4897
    if (opid_field_iter == this->elf_value_defs.end()) {
69,322✔
4898
        auto vd
4899
            = std::make_shared<value_def>(this->elf_opid_field,
69,322✔
4900
                                          value_kind_t::VALUE_TEXT,
×
4901
                                          logline_value_meta::internal_column{},
×
4902
                                          this);
69,322✔
4903
        if (this->elf_type == elf_type_t::ELF_TYPE_JSON) {
69,322✔
4904
            this->elf_value_def_order.emplace_back(vd);
13,666✔
4905
        }
4906
        vd->vd_meta.lvm_name = LOG_OPID_STR;
69,322✔
4907
        vd->vd_meta.lvm_kind = value_kind_t::VALUE_TEXT;
69,322✔
4908
        vd->vd_meta.lvm_column = logline_value_meta::internal_column{};
69,322✔
4909
        vd->vd_internal = true;
69,322✔
4910

4911
        this->elf_value_defs[LOG_OPID_STR] = vd;
69,322✔
4912
    }
69,322✔
4913

4914
    if (!this->elf_body_field.empty()) {
69,322✔
4915
        auto& vd = this->elf_value_defs[this->elf_body_field];
69,322✔
4916
        if (vd.get() == nullptr) {
69,322✔
4917
            vd = std::make_shared<value_def>(
54,025✔
4918
                this->elf_body_field,
54,025✔
4919
                value_kind_t::VALUE_TEXT,
×
4920
                logline_value_meta::internal_column{},
×
4921
                this);
54,025✔
4922
            if (this->elf_type == elf_type_t::ELF_TYPE_JSON) {
54,025✔
4923
                this->elf_value_def_order.emplace_back(vd);
7,985✔
4924
            }
4925
        }
4926
        vd->vd_meta.lvm_name = this->elf_body_field;
69,322✔
4927
        vd->vd_meta.lvm_kind = value_kind_t::VALUE_TEXT;
69,322✔
4928
        vd->vd_meta.lvm_column = logline_value_meta::internal_column{};
69,322✔
4929
        vd->vd_internal = true;
69,322✔
4930
    }
4931

4932
    if (!this->elf_src_file_field.empty()) {
69,322✔
4933
        auto& vd = this->elf_value_defs[this->elf_src_file_field];
9,393✔
4934
        if (vd.get() == nullptr) {
9,393✔
4935
            vd = std::make_shared<value_def>(
1✔
4936
                this->elf_src_file_field,
1✔
4937
                value_kind_t::VALUE_TEXT,
×
4938
                logline_value_meta::internal_column{},
×
4939
                this);
2✔
4940
        }
4941
        vd->vd_meta.lvm_name = this->elf_src_file_field;
9,393✔
4942
        vd->vd_meta.lvm_kind = value_kind_t::VALUE_TEXT;
9,393✔
4943
        vd->vd_meta.lvm_column = logline_value_meta::internal_column{};
9,393✔
4944
    }
4945

4946
    if (!this->elf_src_line_field.empty()) {
69,322✔
4947
        auto& vd = this->elf_value_defs[this->elf_src_line_field];
9,393✔
4948
        if (vd.get() == nullptr) {
9,393✔
4949
            vd = std::make_shared<value_def>(
1✔
4950
                this->elf_src_line_field,
1✔
4951
                value_kind_t::VALUE_INTEGER,
×
4952
                logline_value_meta::internal_column{},
×
4953
                this);
2✔
4954
        }
4955
        vd->vd_meta.lvm_name = this->elf_src_line_field;
9,393✔
4956
        vd->vd_meta.lvm_kind = value_kind_t::VALUE_INTEGER;
9,393✔
4957
        vd->vd_meta.lvm_column = logline_value_meta::internal_column{};
9,393✔
4958
    }
4959

4960
    if (!this->elf_thread_id_field.empty()) {
69,322✔
4961
        auto& vd = this->elf_value_defs[this->elf_thread_id_field];
21,457✔
4962
        if (vd.get() == nullptr) {
21,457✔
4963
            vd = std::make_shared<value_def>(
1✔
4964
                this->elf_thread_id_field,
1✔
4965
                value_kind_t::VALUE_TEXT,
×
4966
                logline_value_meta::internal_column{},
×
4967
                this);
2✔
4968
        }
4969
        vd->vd_meta.lvm_name = this->elf_thread_id_field;
21,457✔
4970
        vd->vd_meta.lvm_kind = value_kind_t::VALUE_TEXT;
21,457✔
4971
        vd->vd_meta.lvm_column = logline_value_meta::internal_column{};
21,457✔
4972
    }
4973

4974
    if (!this->elf_duration_field.empty()) {
69,322✔
4975
        auto& vd = this->elf_value_defs[this->elf_duration_field];
4,864✔
4976
        if (vd.get() == nullptr) {
4,864✔
4977
            vd = std::make_shared<value_def>(
×
4978
                this->elf_duration_field,
×
4979
                value_kind_t::VALUE_FLOAT,
×
4980
                logline_value_meta::internal_column{},
×
4981
                this);
×
4982
        }
4983
        vd->vd_meta.lvm_name = this->elf_duration_field;
4,864✔
4984
        vd->vd_meta.lvm_kind = value_kind_t::VALUE_FLOAT;
4,864✔
4985
        vd->vd_meta.lvm_column = logline_value_meta::internal_column{};
4,864✔
4986
    }
4987

4988
    for (auto& od_pair : *this->lf_opid_description_def) {
99,132✔
4989
        od_pair.second.od_name = od_pair.first;
29,810✔
4990
        od_pair.second.od_index = this->lf_opid_description_def_vec->size();
29,810✔
4991
        this->lf_opid_description_def_vec->emplace_back(&od_pair.second);
29,810✔
4992
    }
4993

4994
    for (auto& od_pair : *this->lf_subid_description_def) {
70,250✔
4995
        od_pair.second.od_name = od_pair.first;
928✔
4996
        od_pair.second.od_index = this->lf_subid_description_def_vec->size();
928✔
4997
        this->lf_subid_description_def_vec->emplace_back(&od_pair.second);
928✔
4998
    }
4999

5000
    if (!this->lf_timestamp_format.empty()) {
69,322✔
5001
        this->lf_timestamp_format.push_back(nullptr);
8,468✔
5002
    }
5003
    auto src_file_found = 0;
69,322✔
5004
    auto src_line_found = 0;
69,322✔
5005
    auto thread_id_found = 0;
69,322✔
5006
    auto duration_found = 0;
69,322✔
5007
    for (auto& elf_pattern : this->elf_patterns) {
194,470✔
5008
        auto& pat = *elf_pattern.second;
125,148✔
5009

5010
        if (pat.p_pcre.pp_value == nullptr) {
125,148✔
5011
            continue;
1✔
5012
        }
5013

5014
        if (pat.p_opid_field_index == -1
250,294✔
5015
            && this->lf_opid_source.value_or(opid_source_t::from_description)
125,147✔
5016
                == opid_source_t::from_description
5017
            && this->lf_opid_description_def->size() == 1)
250,294✔
5018
        {
5019
            const auto& opid_def
5020
                = this->lf_opid_description_def->begin()->second;
30,624✔
5021
            for (const auto& desc : *opid_def.od_descriptors) {
68,672✔
5022
                for (auto named_cap : pat.p_pcre.pp_value->get_named_captures())
443,584✔
5023
                {
5024
                    const intern_string_t name
5025
                        = intern_string::lookup(named_cap.get_name());
405,536✔
5026

5027
                    if (name == desc.od_field.pp_value) {
405,536✔
5028
                        pat.p_opid_description_field_indexes.emplace_back(
70,528✔
5029
                            named_cap.get_index());
35,264✔
5030
                    }
5031
                }
5032
            }
5033
        }
5034

5035
        for (auto named_cap : pat.p_pcre.pp_value->get_named_captures()) {
1,066,178✔
5036
            const intern_string_t name
5037
                = intern_string::lookup(named_cap.get_name());
941,031✔
5038

5039
            if (name == this->lf_timestamp_field) {
941,031✔
5040
                pat.p_timestamp_field_index = named_cap.get_index();
125,145✔
5041
            }
5042
            if (name == this->lf_time_field) {
941,031✔
5043
                pat.p_time_field_index = named_cap.get_index();
928✔
5044
            }
5045
            if (name == this->elf_level_field) {
941,031✔
5046
                pat.p_level_field_index = named_cap.get_index();
95,218✔
5047
            }
5048
            if (name == this->elf_opid_field) {
941,031✔
5049
                pat.p_opid_field_index = named_cap.get_index();
25,984✔
5050
            }
5051
            if (name == this->elf_subid_field) {
941,031✔
5052
                pat.p_subid_field_index = named_cap.get_index();
13,920✔
5053
            }
5054
            if (name == this->elf_body_field) {
941,031✔
5055
                pat.p_body_field_index = named_cap.get_index();
108,441✔
5056
            }
5057
            if (name == this->elf_src_file_field) {
941,031✔
5058
                pat.p_src_file_field_index = named_cap.get_index();
14,848✔
5059
                src_file_found += 1;
14,848✔
5060
            }
5061
            if (name == this->elf_src_line_field) {
941,031✔
5062
                pat.p_src_line_field_index = named_cap.get_index();
16,704✔
5063
                src_line_found += 1;
16,704✔
5064
            }
5065
            if (name == this->elf_thread_id_field) {
941,031✔
5066
                pat.p_thread_id_field_index = named_cap.get_index();
51,968✔
5067
                thread_id_found += 1;
51,968✔
5068
            }
5069
            if (name == this->elf_duration_field) {
941,031✔
5070
                pat.p_duration_field_index = named_cap.get_index();
6,496✔
5071
                duration_found += 1;
6,496✔
5072
            }
5073
            if (name == this->lf_start_timestamp_field) {
941,031✔
5074
                pat.p_start_timestamp_field_index = named_cap.get_index();
×
5075
            }
5076

5077
            auto value_iter = this->elf_value_defs.find(name);
941,031✔
5078
            if (value_iter != this->elf_value_defs.end()) {
941,031✔
5079
                auto vd = value_iter->second;
926,997✔
5080
                indexed_value_def ivd;
926,997✔
5081

5082
                ivd.ivd_index = named_cap.get_index();
926,997✔
5083
                if (!vd->vd_unit_field.empty()) {
926,997✔
5084
                    ivd.ivd_unit_field_index = pat.p_pcre.pp_value->name_index(
×
5085
                        vd->vd_unit_field.get());
×
5086
                } else {
5087
                    ivd.ivd_unit_field_index = -1;
926,997✔
5088
                }
5089
                if (!vd->vd_internal
926,997✔
5090
                    && !vd->vd_meta.lvm_column
1,590,263✔
5091
                            .is<logline_value_meta::table_column>())
663,266✔
5092
                {
5093
                    vd->vd_meta.lvm_column = logline_value_meta::table_column{
372,802✔
5094
                        this->elf_column_count++};
372,802✔
5095
                }
5096
                ivd.ivd_value_def = vd;
926,997✔
5097
                pat.p_value_by_index.push_back(ivd);
926,997✔
5098
            }
926,997✔
5099
            pat.p_value_name_to_index[name] = named_cap.get_index();
941,031✔
5100
        }
5101

5102
        stable_sort(pat.p_value_by_index.begin(), pat.p_value_by_index.end());
125,147✔
5103

5104
        for (int lpc = 0; lpc < (int) pat.p_value_by_index.size(); lpc++) {
1,052,144✔
5105
            auto& ivd = pat.p_value_by_index[lpc];
926,997✔
5106
            auto vd = ivd.ivd_value_def;
926,997✔
5107

5108
            if (!vd->vd_meta.lvm_foreign_key && !vd->vd_meta.lvm_identifier) {
926,997✔
5109
                switch (vd->vd_meta.lvm_kind) {
474,724✔
5110
                    case value_kind_t::VALUE_INTEGER:
64,144✔
5111
                    case value_kind_t::VALUE_FLOAT:
5112
                        pat.p_numeric_value_indexes.push_back(lpc);
64,144✔
5113
                        break;
64,144✔
5114
                    default:
410,580✔
5115
                        break;
410,580✔
5116
                }
5117
            }
5118
        }
926,997✔
5119

5120
        if (pat.p_timestamp_field_index == -1) {
125,147✔
5121
            errors.emplace_back(
2✔
5122
                lnav::console::user_message::error(
×
5123
                    attr_line_t("invalid pattern: ")
4✔
5124
                        .append_quoted(lnav::roles::symbol(pat.p_config_path)))
4✔
5125
                    .with_reason("no timestamp capture found in the pattern")
4✔
5126
                    .with_snippets(this->get_snippets())
4✔
5127
                    .with_help("all log messages need a timestamp"));
5128
        }
5129

5130
        if (!this->elf_level_field.empty() && pat.p_level_field_index == -1) {
125,147✔
5131
            log_warning("%s:level field '%s' not found in pattern",
29,929✔
5132
                        pat.p_config_path.c_str(),
5133
                        this->elf_level_field.get());
5134
        }
5135
        if (!this->elf_body_field.empty() && pat.p_body_field_index == -1) {
125,147✔
5136
            log_warning("%s:body field '%s' not found in pattern",
16,706✔
5137
                        pat.p_config_path.c_str(),
5138
                        this->elf_body_field.get());
5139
        }
5140

5141
        this->elf_pattern_order.push_back(elf_pattern.second);
125,147✔
5142
    }
5143
    if (this->elf_type == elf_type_t::ELF_TYPE_TEXT
138,644✔
5144
        && !this->elf_src_file_field.empty() && src_file_found == 0)
69,322✔
5145
    {
5146
        errors.emplace_back(
1✔
5147
            lnav::console::user_message::error(
×
5148
                attr_line_t("invalid pattern: ")
2✔
5149
                    .append_quoted(
1✔
5150
                        lnav::roles::symbol(this->elf_name.to_string())))
2✔
5151
                .with_reason("no source file capture found in the pattern")
2✔
5152
                .with_snippets(this->get_snippets())
2✔
5153
                .with_help(attr_line_t("at least one pattern needs a source "
2✔
5154
                                       "file capture named ")
5155
                               .append_quoted(this->elf_src_file_field.get())));
1✔
5156
    }
5157
    if (this->elf_type == elf_type_t::ELF_TYPE_TEXT
138,644✔
5158
        && !this->elf_src_line_field.empty() && src_line_found == 0)
69,322✔
5159
    {
5160
        errors.emplace_back(
1✔
5161
            lnav::console::user_message::error(
×
5162
                attr_line_t("invalid pattern: ")
2✔
5163
                    .append_quoted(
1✔
5164
                        lnav::roles::symbol(this->elf_name.to_string())))
2✔
5165
                .with_reason("no source line capture found in the pattern")
2✔
5166
                .with_snippets(this->get_snippets())
2✔
5167
                .with_help(attr_line_t("at least one pattern needs a source "
2✔
5168
                                       "line capture named ")
5169
                               .append_quoted(this->elf_src_line_field.get())));
1✔
5170
    }
5171
    if (this->elf_type == elf_type_t::ELF_TYPE_TEXT
138,644✔
5172
        && !this->elf_thread_id_field.empty() && thread_id_found == 0)
69,322✔
5173
    {
5174
        errors.emplace_back(
1✔
5175
            lnav::console::user_message::error(
×
5176
                attr_line_t("invalid pattern: ")
2✔
5177
                    .append_quoted(
1✔
5178
                        lnav::roles::symbol(this->elf_name.to_string())))
2✔
5179
                .with_reason("no thread ID capture found in the pattern")
2✔
5180
                .with_snippets(this->get_snippets())
2✔
5181
                .with_help(
5182
                    attr_line_t(
2✔
5183
                        "at least one pattern needs a thread ID capture named ")
5184
                        .append_quoted(this->elf_thread_id_field.get())));
1✔
5185
    }
5186
    if (this->elf_type == elf_type_t::ELF_TYPE_TEXT
138,644✔
5187
        && !this->elf_duration_field.empty() && duration_found == 0)
69,322✔
5188
    {
5189
        errors.emplace_back(
×
5190
            lnav::console::user_message::error(
×
5191
                attr_line_t("invalid pattern: ")
×
5192
                    .append_quoted(
×
5193
                        lnav::roles::symbol(this->elf_name.to_string())))
×
5194
                .with_reason("no duration capture found in the pattern")
×
5195
                .with_snippets(this->get_snippets())
×
5196
                .with_help(
5197
                    attr_line_t(
×
5198
                        "at least one pattern needs a duration capture named ")
5199
                        .append_quoted(this->elf_duration_field.get())));
×
5200
    }
5201

5202
    if (this->elf_type != elf_type_t::ELF_TYPE_TEXT) {
69,322✔
5203
        if (!this->elf_patterns.empty()) {
14,818✔
5204
            errors.emplace_back(
1✔
5205
                lnav::console::user_message::error(
×
5206
                    attr_line_t()
2✔
5207
                        .append_quoted(
1✔
5208
                            lnav::roles::symbol(this->elf_name.to_string()))
2✔
5209
                        .append(" is not a valid log format"))
1✔
5210
                    .with_reason("structured logs cannot have regexes")
2✔
5211
                    .with_snippets(this->get_snippets()));
2✔
5212
        }
5213
        if (this->elf_type == elf_type_t::ELF_TYPE_JSON) {
14,818✔
5214
            this->lf_multiline = true;
13,666✔
5215
            this->lf_structured = true;
13,666✔
5216
            this->lf_formatted_lines = true;
13,666✔
5217
            this->jlf_parse_context
5218
                = std::make_shared<yajlpp_parse_context>(this->elf_name);
13,666✔
5219
            this->jlf_yajl_handle.reset(
13,666✔
5220
                yajl_alloc(&this->jlf_parse_context->ypc_callbacks,
13,666✔
5221
                           nullptr,
5222
                           this->jlf_parse_context.get()),
13,666✔
5223
                yajl_handle_deleter());
5224
            yajl_config(
13,666✔
5225
                this->jlf_yajl_handle.get(), yajl_dont_validate_strings, 1);
5226
        } else if (this->elf_type == elf_type_t::ELF_TYPE_TABULAR) {
1,152✔
5227
            this->lf_structured = true;
1,152✔
5228
            if (!this->jlf_line_format.empty()) {
1,152✔
5229
                // The format owns its rendering via line-format.
5230
                this->lf_formatted_lines = true;
928✔
5231
            }
5232
        }
5233
    } else {
5234
        if (this->elf_patterns.empty()) {
54,504✔
5235
            errors.emplace_back(lnav::console::user_message::error(
2✔
5236
                                    attr_line_t()
4✔
5237
                                        .append_quoted(lnav::roles::symbol(
4✔
5238
                                            this->elf_name.to_string()))
4✔
5239
                                        .append(" is not a valid log format"))
2✔
5240
                                    .with_reason("no regexes specified")
4✔
5241
                                    .with_snippets(this->get_snippets()));
4✔
5242
        }
5243
    }
5244

5245
    stable_sort(this->elf_level_pairs.begin(), this->elf_level_pairs.end());
69,322✔
5246

5247
    {
5248
        safe::WriteAccess<safe_format_header_expressions> hexprs(
5249
            format_header_exprs);
69,322✔
5250

5251
        if (hexprs->e_db.in() == nullptr) {
69,322✔
5252
            if (sqlite3_open(":memory:", hexprs->e_db.out()) != SQLITE_OK) {
928✔
5253
                log_error("unable to open memory DB");
×
5254
                return;
×
5255
            }
5256
            register_sqlite_funcs(hexprs->e_db.in(), sqlite_registration_funcs);
928✔
5257
        }
5258

5259
        for (const auto& hpair : this->elf_converter.c_header.h_exprs.he_exprs)
73,963✔
5260
        {
5261
            auto stmt_str
5262
                = fmt::format(FMT_STRING("SELECT 1 WHERE {}"), hpair.second);
13,923✔
5263
            compiled_header_expr che;
4,641✔
5264

5265
            log_info("preparing file-format header expression: %s",
4,641✔
5266
                     stmt_str.c_str());
5267
            auto retcode = sqlite3_prepare_v2(hexprs->e_db.in(),
9,282✔
5268
                                              stmt_str.c_str(),
5269
                                              stmt_str.size(),
4,641✔
5270
                                              che.che_stmt.out(),
5271
                                              nullptr);
5272
            if (retcode != SQLITE_OK) {
4,641✔
5273
                auto sql_al = attr_line_t(hpair.second)
2✔
5274
                                  .with_attr_for_all(SA_PREFORMATTED.value())
2✔
5275
                                  .with_attr_for_all(
1✔
5276
                                      VC_ROLE.value(role_t::VCR_QUOTED_CODE))
2✔
5277
                                  .move();
1✔
5278
                readline_sql_highlighter(
1✔
5279
                    sql_al, lnav::sql::dialect::sqlite, std::nullopt);
5280
                intern_string_t watch_expr_path = intern_string::lookup(
5281
                    fmt::format(FMT_STRING("/{}/converter/header/expr/{}"),
3✔
5282
                                this->elf_name,
1✔
5283
                                hpair.first));
2✔
5284
                auto snippet = lnav::console::snippet::from(
5285
                    source_location(watch_expr_path), sql_al);
1✔
5286

5287
                auto um = lnav::console::user_message::error(
2✔
5288
                              "SQL expression is invalid")
5289
                              .with_reason(sqlite3_errmsg(hexprs->e_db.in()))
2✔
5290
                              .with_snippet(snippet)
1✔
5291
                              .move();
1✔
5292

5293
                errors.emplace_back(um);
1✔
5294
                continue;
1✔
5295
            }
1✔
5296

5297
            hexprs->e_header_exprs[this->elf_name][hpair.first]
4,640✔
5298
                = std::move(che);
9,280✔
5299
        }
4,642✔
5300

5301
        if (!this->elf_converter.c_header.h_exprs.he_exprs.empty()
69,322✔
5302
            && this->elf_converter.c_command.pp_value.empty())
69,322✔
5303
        {
5304
            auto um = lnav::console::user_message::error(
2✔
5305
                          "A command is required when a converter is defined")
5306
                          .with_help(
2✔
5307
                              "The converter command transforms the file "
5308
                              "into a format that can be consumed by lnav")
5309
                          .with_snippets(this->get_snippets())
2✔
5310
                          .move();
1✔
5311
            errors.emplace_back(um);
1✔
5312
        }
1✔
5313
    }
69,322✔
5314

5315
    if (this->elf_type == elf_type_t::ELF_TYPE_TABULAR) {
69,322✔
5316
        auto vd
5317
            = std::make_shared<value_def>(LOG_EXTRA_FIELDS_STR,
5318
                                          value_kind_t::VALUE_JSON,
×
5319
                                          logline_value_meta::external_column{},
×
5320
                                          this);
1,152✔
5321
        vd->vd_description
1,152✔
5322
            = "unknown fields in the file are stored in this JSON column";
1,152✔
5323
        this->elf_value_defs[LOG_EXTRA_FIELDS_STR] = vd;
1,152✔
5324
        this->elf_value_def_order.emplace_back(vd);
1,152✔
5325
    }
1,152✔
5326

5327
    for (auto& vd : this->elf_value_def_order) {
723,175✔
5328
        std::vector<std::string>::iterator act_iter;
653,853✔
5329

5330
        if (!vd->vd_internal
653,853✔
5331
            && log_vtab_impl::RESERVED_COLUMNS.count(
1,239,602✔
5332
                vd->vd_meta.lvm_name.to_string_fragment()))
1,239,602✔
5333
        {
5334
            auto um = lnav::console::user_message::error(
×
5335
                          attr_line_t("value name ")
1✔
5336
                              .append_quoted(lnav::roles::symbol(
2✔
5337
                                  fmt::format(FMT_STRING("/{}/value/{}"),
4✔
5338
                                              this->elf_name,
1✔
5339
                                              vd->vd_meta.lvm_name)))
1✔
5340
                              .append(" is reserved and cannot be used"))
1✔
5341
                          .with_reason(
2✔
5342
                              "lnav automatically defines several columns in "
5343
                              "the log virtual table")
5344
                          .with_snippets(this->get_snippets())
2✔
5345
                          .with_help("Choose another name")
2✔
5346
                          .move();
1✔
5347
            errors.emplace_back(um);
1✔
5348
        }
1✔
5349

5350
        vd->vd_meta.lvm_format = this;
653,853✔
5351
        if (!vd->vd_internal
653,853✔
5352
            && !vd->vd_meta.lvm_column.is<logline_value_meta::table_column>())
653,853✔
5353
        {
5354
            vd->vd_meta.lvm_column
212,947✔
5355
                = logline_value_meta::table_column{this->elf_column_count++};
212,947✔
5356
        }
5357

5358
        if (vd->vd_meta.lvm_kind == value_kind_t::VALUE_UNKNOWN) {
653,853✔
5359
            log_warning("no kind set for %s, assuming text",
×
5360
                        vd->vd_meta.lvm_name.c_str());
5361
            vd->vd_meta.lvm_kind = value_kind_t::VALUE_TEXT;
×
5362
        }
5363

5364
        if (this->elf_type == elf_type_t::ELF_TYPE_TEXT) {
653,853✔
5365
            std::set<std::string> available_captures;
394,260✔
5366

5367
            bool found_in_pattern = false;
394,260✔
5368
            for (const auto& pat : this->elf_patterns) {
660,600✔
5369
                if (pat.second->p_pcre.pp_value == nullptr) {
660,598✔
5370
                    continue;
×
5371
                }
5372

5373
                auto cap_index = pat.second->p_pcre.pp_value->name_index(
1,321,196✔
5374
                    vd->vd_meta.lvm_name.get());
660,598✔
5375
                if (cap_index >= 0) {
660,598✔
5376
                    found_in_pattern = true;
394,258✔
5377
                    break;
394,258✔
5378
                }
5379

5380
                for (auto named_cap :
266,340✔
5381
                     pat.second->p_pcre.pp_value->get_named_captures())
2,426,738✔
5382
                {
5383
                    available_captures.insert(named_cap.get_name().to_string());
1,894,058✔
5384
                }
5385
            }
5386
            if (!found_in_pattern) {
394,260✔
5387
                auto notes
5388
                    = attr_line_t("the following captures are available:\n  ")
2✔
5389
                          .join(available_captures,
2✔
5390
                                VC_ROLE.value(role_t::VCR_SYMBOL),
4✔
5391
                                ", ")
5392
                          .move();
2✔
5393
                errors.emplace_back(
2✔
5394
                    lnav::console::user_message::warning(
×
5395
                        attr_line_t("invalid value ")
2✔
5396
                            .append_quoted(lnav::roles::symbol(
4✔
5397
                                fmt::format(FMT_STRING("/{}/value/{}"),
8✔
5398
                                            this->elf_name,
2✔
5399
                                            vd->vd_meta.lvm_name.get()))))
4✔
5400
                        .with_reason(
4✔
5401
                            attr_line_t("no patterns have a capture named ")
4✔
5402
                                .append_quoted(vd->vd_meta.lvm_name.get()))
2✔
5403
                        .with_note(notes)
2✔
5404
                        .with_snippets(this->get_snippets())
4✔
5405
                        .with_help("values are populated from captures in "
5406
                                   "patterns, so at least one pattern must "
5407
                                   "have a capture with this value name"));
5408
            }
2✔
5409
        }
394,260✔
5410

5411
        for (act_iter = vd->vd_action_list.begin();
653,853✔
5412
             act_iter != vd->vd_action_list.end();
654,781✔
5413
             ++act_iter)
928✔
5414
        {
5415
            if (this->lf_action_defs.find(*act_iter)
928✔
5416
                == this->lf_action_defs.end())
1,856✔
5417
            {
5418
#if 0
5419
                errors.push_back("error:" + this->elf_name.to_string() + ":"
5420
                                 + vd->vd_meta.lvm_name.get()
5421
                                 + ": cannot find action -- " + (*act_iter));
5422
#endif
5423
            }
5424
        }
5425

5426
        vd->set_rewrite_src_name();
653,853✔
5427

5428
        for (auto& hd_pair : vd->vd_highlighter_patterns) {
659,645✔
5429
            auto& hd = hd_pair.second;
5,792✔
5430
            auto attrs = vc.to_attrs(hd.hd_base_style, errors);
5,792✔
5431

5432
            if (hd.hd_pattern.pp_value == nullptr) {
5,792✔
5433
                hd.hd_pattern.pp_value
5434
                    = lnav::pcre2pp::code::from_const(".*").to_shared();
5,568✔
5435
            }
5436
            if (hd.hd_pattern.pp_value != nullptr) {
5,792✔
5437
                this->lf_highlighters.emplace_back(hd.hd_pattern.pp_value);
5,792✔
5438
                auto& hl = this->lf_highlighters.back();
5,792✔
5439

5440
                hl.with_field(vd->vd_meta.lvm_name)
5,792✔
5441
                    .with_name(hd_pair.first.to_string())
11,584✔
5442
                    .with_attrs(attrs)
5,792✔
5443
                    .with_nestable(hd.hd_base_style.sc_nestable);
5,792✔
5444
                for (const auto& [cap_key, cap_style] : hd.hd_capture_styles) {
5,904✔
5445
                    auto cap_index = hl.h_regex->name_index(cap_key.c_str());
112✔
5446
                    if (cap_index < 0) {
112✔
5447
                        auto um
5448
                            = lnav::console::user_message::error(
×
5449
                                  attr_line_t(
×
5450
                                      "invalid highlight capture named ")
5451
                                      .append_quoted(cap_key)
×
5452
                                      .append(" in highlight ")
×
5453
                                      .append_quoted(fmt::format(
×
5454
                                          FMT_STRING("/{}/value/{}/highlights/"
×
5455
                                                     "{}/captures"),
5456
                                          this->elf_name,
×
5457
                                          vd->vd_meta.lvm_name,
×
5458
                                          hd_pair.first)))
×
5459
                                  .with_reason(
×
5460
                                      attr_line_t("pattern does not have a "
×
5461
                                                  "capture named ")
5462
                                          .append_quoted(cap_key));
×
5463
                        errors.emplace_back(um);
×
5464
                        continue;
×
5465
                    }
5466

5467
                    hl.h_capture_attrs[cap_index - 1]
112✔
5468
                        = vc.to_attrs(cap_style, errors);
224✔
5469
                }
5470
            }
5471
        }
5472
    }
5473

5474
    if (this->elf_type == elf_type_t::ELF_TYPE_JSON
69,322✔
5475
        || this->elf_type == elf_type_t::ELF_TYPE_TABULAR)
55,656✔
5476
    {
5477
        for (const auto& vd : this->elf_value_def_order) {
274,411✔
5478
            this->elf_value_def_frag_map[vd->vd_meta.lvm_name
259,593✔
5479
                                             .to_string_fragment()] = vd.get();
259,593✔
5480
        }
5481
    }
5482

5483
    for (const auto& td_pair : this->lf_tag_defs) {
72,221✔
5484
        const auto& td = td_pair.second;
2,899✔
5485

5486
        if (td->ftd_pattern.pp_value == nullptr
2,899✔
5487
            || td->ftd_pattern.pp_value->get_pattern().empty())
2,899✔
5488
        {
5489
            errors.emplace_back(
3✔
5490
                lnav::console::user_message::error(
×
5491
                    attr_line_t("invalid tag definition ")
6✔
5492
                        .append_quoted(lnav::roles::symbol(
6✔
5493
                            fmt::format(FMT_STRING("/{}/tags/{}"),
12✔
5494
                                        this->elf_name,
3✔
5495
                                        td_pair.first))))
3✔
5496
                    .with_reason(
6✔
5497
                        "tag definitions must have a non-empty pattern")
5498
                    .with_snippets(this->get_snippets()));
6✔
5499
        }
5500
    }
5501

5502
    if (this->elf_opid_field.empty()
69,322✔
5503
        && this->lf_opid_description_def->size() > 1)
69,322✔
5504
    {
5505
        errors.emplace_back(
1✔
5506
            lnav::console::user_message::error(
×
5507
                attr_line_t("too many opid descriptions")
2✔
5508
                    .append_quoted(lnav::roles::symbol(fmt::format(
2✔
5509
                        FMT_STRING("/{}/opid/description"), this->elf_name))))
4✔
5510
                .with_reason(attr_line_t("when no ")
2✔
5511
                                 .append("opid-field"_symbol)
1✔
5512
                                 .append(" is specified, only a single "
1✔
5513
                                         "description is supported"))
5514
                .with_snippets(this->get_snippets()));
2✔
5515
    }
5516

5517
    for (const auto& opid_desc_pair : *this->lf_opid_description_def) {
99,132✔
5518
        for (const auto& opid_desc : *opid_desc_pair.second.od_descriptors) {
70,868✔
5519
            auto iter = this->elf_value_defs.find(opid_desc.od_field.pp_value);
41,058✔
5520
            if (iter == this->elf_value_defs.end()) {
41,058✔
5521
                errors.emplace_back(
2✔
5522
                    lnav::console::user_message::error(
×
5523
                        attr_line_t("invalid opid description field ")
4✔
5524
                            .append_quoted(lnav::roles::symbol(
4✔
5525
                                opid_desc.od_field.pp_path.to_string())))
4✔
5526
                        .with_reason(
4✔
5527
                            attr_line_t("unknown value name ")
4✔
5528
                                .append_quoted(opid_desc.od_field.pp_value))
2✔
5529
                        .with_snippets(this->get_snippets()));
4✔
5530
            } else {
5531
                this->lf_desc_fields.insert(iter->first);
41,056✔
5532
                iter->second->vd_is_desc_field = true;
41,056✔
5533
            }
5534
        }
5535
    }
5536

5537
    for (const auto& subid_desc_pair : *this->lf_subid_description_def) {
70,250✔
5538
        for (const auto& subid_desc : *subid_desc_pair.second.od_descriptors) {
1,856✔
5539
            auto iter = this->elf_value_defs.find(subid_desc.od_field.pp_value);
928✔
5540
            if (iter == this->elf_value_defs.end()) {
928✔
5541
                errors.emplace_back(
×
5542
                    lnav::console::user_message::error(
×
5543
                        attr_line_t("invalid subid description field ")
×
5544
                            .append_quoted(lnav::roles::symbol(
×
5545
                                subid_desc.od_field.pp_path.to_string())))
×
5546
                        .with_reason(
×
5547
                            attr_line_t("unknown value name ")
×
5548
                                .append_quoted(subid_desc.od_field.pp_value))
×
5549
                        .with_snippets(this->get_snippets()));
×
5550
            } else {
5551
                this->lf_desc_fields.insert(iter->first);
928✔
5552
                iter->second->vd_is_desc_field = true;
928✔
5553
            }
5554
        }
5555
    }
5556

5557
    if (this->elf_type == elf_type_t::ELF_TYPE_TEXT
138,644✔
5558
        && this->elf_samples.empty())
69,322✔
5559
    {
5560
        errors.emplace_back(
3✔
5561
            lnav::console::user_message::error(
×
5562
                attr_line_t()
6✔
5563
                    .append_quoted(
3✔
5564
                        lnav::roles::symbol(this->elf_name.to_string()))
6✔
5565
                    .append(" is not a valid log format"))
3✔
5566
                .with_reason("log message samples must be included in a format "
6✔
5567
                             "definition")
5568
                .with_snippets(this->get_snippets()));
6✔
5569
    }
5570

5571
    for (const auto& pat : this->elf_pattern_order) {
194,469✔
5572
        if (this->elf_type != elf_type_t::ELF_TYPE_TEXT) {
125,147✔
5573
            continue;
1✔
5574
        }
5575
        if (pat->p_pcre.pp_value->name_index(this->lf_timestamp_field.get())
125,146✔
5576
            < 0)
125,146✔
5577
        {
5578
            attr_line_t notes;
1✔
5579
            bool first_note = true;
1✔
5580

5581
            if (pat->p_pcre.pp_value->get_capture_count() > 0) {
1✔
5582
                notes.append("the following captures are available:\n  ");
1✔
5583
            }
5584
            for (auto named_cap : pat->p_pcre.pp_value->get_named_captures()) {
4✔
5585
                if (!first_note) {
3✔
5586
                    notes.append(", ");
2✔
5587
                }
5588
                notes.append(
3✔
5589
                    lnav::roles::symbol(named_cap.get_name().to_string()));
6✔
5590
                first_note = false;
3✔
5591
            }
5592
            errors.emplace_back(
1✔
5593
                lnav::console::user_message::error(
×
5594
                    attr_line_t("invalid value for property ")
1✔
5595
                        .append_quoted(lnav::roles::symbol(
2✔
5596
                            fmt::format(FMT_STRING("/{}/timestamp-field"),
4✔
5597
                                        this->elf_name))))
1✔
5598
                    .with_reason(
2✔
5599
                        attr_line_t()
2✔
5600
                            .append_quoted(this->lf_timestamp_field)
1✔
5601
                            .append(" was not found in the pattern at ")
1✔
5602
                            .append(lnav::roles::symbol(pat->p_config_path)))
2✔
5603
                    .with_note(notes)
1✔
5604
                    .with_snippets(this->get_snippets()));
2✔
5605
        }
1✔
5606
    }
5607

5608
    for (size_t sample_index = 0; sample_index < this->elf_samples.size();
309,764✔
5609
         sample_index += 1)
240,442✔
5610
    {
5611
        auto& elf_sample = this->elf_samples[sample_index];
240,442✔
5612
        auto sample_lines
5613
            = string_fragment(elf_sample.s_line.pp_value).split_lines();
240,442✔
5614

5615
        if (this->test_line(elf_sample, errors).is<scan_match>()) {
240,442✔
5616
            for (const auto& pat_name : elf_sample.s_matched_regexes) {
477,170✔
5617
                this->elf_patterns[pat_name]->p_matched_samples.emplace(
236,729✔
5618
                    sample_index);
5619
            }
5620
        }
5621
    }
240,442✔
5622

5623
    if (!this->elf_samples.empty()) {
69,322✔
5624
        for (const auto& elf_sample : this->elf_samples) {
296,799✔
5625
            if (elf_sample.s_matched_regexes.size() <= 1) {
240,442✔
5626
                continue;
240,442✔
5627
            }
5628

5629
            errors.emplace_back(
×
5630
                lnav::console::user_message::warning(
×
5631
                    attr_line_t("invalid log format: ")
×
5632
                        .append_quoted(
×
5633
                            lnav::roles::symbol(this->elf_name.to_string())))
×
5634
                    .with_reason(
×
5635
                        attr_line_t(
×
5636
                            "sample is matched by more than one regex: ")
5637
                            .join(elf_sample.s_matched_regexes,
×
5638
                                  VC_ROLE.value(role_t::VCR_SYMBOL),
×
5639
                                  ", "))
5640
                    .with_snippet(lnav::console::snippet::from(
×
5641
                        elf_sample.s_line.pp_location,
5642
                        attr_line_t().append(lnav::roles::quoted_code(
×
5643
                            elf_sample.s_line.pp_value))))
×
5644
                    .with_help("log format regexes must match a single type "
5645
                               "of log message"));
5646
        }
5647

5648
        for (const auto& pat : this->elf_pattern_order) {
181,501✔
5649
            if (pat->p_matched_samples.empty()) {
125,144✔
5650
                errors.emplace_back(
2✔
5651
                    lnav::console::user_message::warning(
×
5652
                        attr_line_t("invalid pattern: ")
4✔
5653
                            .append_quoted(
2✔
5654
                                lnav::roles::symbol(pat->p_config_path)))
4✔
5655
                        .with_reason("pattern does not match any samples")
4✔
5656
                        .with_snippet(lnav::console::snippet::from(
6✔
5657
                            pat->p_pcre.pp_location, ""))
2✔
5658
                        .with_help(
5659
                            "every pattern should have at least one sample "
5660
                            "that it matches"));
5661
            }
5662
        }
5663
    }
5664

5665
    size_t value_def_index = 0;
69,322✔
5666
    for (auto& elf_value_def : this->elf_value_def_order) {
723,175✔
5667
        elf_value_def->vd_meta.lvm_values_index
653,853✔
5668
            = std::make_optional(value_def_index++);
653,853✔
5669

5670
        if (elf_value_def->vd_meta.lvm_foreign_key
653,853✔
5671
            || elf_value_def->vd_meta.lvm_identifier)
653,853✔
5672
        {
5673
            continue;
332,609✔
5674
        }
5675

5676
        switch (elf_value_def->vd_meta.lvm_kind) {
321,244✔
5677
            case value_kind_t::VALUE_INTEGER:
80,704✔
5678
            case value_kind_t::VALUE_FLOAT:
5679
                this->elf_numeric_value_defs.push_back(elf_value_def);
80,704✔
5680
                break;
80,704✔
5681
            default:
240,540✔
5682
                break;
240,540✔
5683
        }
5684
    }
5685

5686
    int format_index = 0;
69,322✔
5687
    for (auto iter = this->jlf_line_format.begin();
69,322✔
5688
         iter != this->jlf_line_format.end();
243,853✔
5689
         ++iter, format_index++)
174,531✔
5690
    {
5691
        static const intern_string_t ts
5692
            = intern_string::lookup("__timestamp__");
176,387✔
5693
        static const intern_string_t level_field
5694
            = intern_string::lookup("__level__");
176,387✔
5695
        static const intern_string_t duration_field
5696
            = intern_string::lookup("__duration__");
176,387✔
5697
        auto& jfe = *iter;
174,531✔
5698

5699
        if (startswith(jfe.jfe_value.pp_value.get(), "/")) {
174,531✔
5700
            jfe.jfe_value.pp_value
5701
                = intern_string::lookup(jfe.jfe_value.pp_value.get() + 1);
224✔
5702
        }
5703
        if (!jfe.jfe_ts_format.empty()) {
174,531✔
5704
            if (!jfe.jfe_value.pp_value.empty() && jfe.jfe_value.pp_value != ts)
1,040✔
5705
            {
5706
                log_warning(
112✔
5707
                    "%s:line-format[%d]:ignoring field '%s' since "
5708
                    "timestamp-format was used",
5709
                    this->elf_name.get(),
5710
                    format_index,
5711
                    jfe.jfe_value.pp_value.get());
5712
            }
5713
            jfe.jfe_value.pp_value = ts;
1,040✔
5714
        }
5715

5716
        switch (jfe.jfe_type) {
174,531✔
5717
            case json_log_field::VARIABLE: {
121,475✔
5718
                auto vd_iter
5719
                    = this->elf_value_defs.find(jfe.jfe_value.pp_value);
121,475✔
5720
                if (jfe.jfe_value.pp_value == ts) {
121,475✔
5721
                    this->elf_value_defs[this->lf_timestamp_field]
12,401✔
5722
                        ->vd_meta.lvm_hidden = true;
12,401✔
5723
                } else if (jfe.jfe_value.pp_value == level_field) {
109,074✔
5724
                    this->elf_value_defs[this->elf_level_field]
4,640✔
5725
                        ->vd_meta.lvm_hidden = true;
4,640✔
5726
                } else if (jfe.jfe_value.pp_value == duration_field) {
104,434✔
5727
                    if (!this->elf_duration_field.empty()) {
928✔
5728
                        this->elf_value_defs[this->elf_duration_field]
928✔
5729
                            ->vd_meta.lvm_hidden = true;
928✔
5730
                    }
5731
                } else if (vd_iter == this->elf_value_defs.end()) {
103,506✔
5732
                    errors.emplace_back(
2✔
5733
                        lnav::console::user_message::error(
×
5734
                            attr_line_t("invalid line format element ")
4✔
5735
                                .append_quoted(lnav::roles::symbol(fmt::format(
4✔
5736
                                    FMT_STRING("/{}/line-format/{}/field"),
6✔
5737
                                    this->elf_name,
2✔
5738
                                    format_index))))
5739
                            .with_reason(
4✔
5740
                                attr_line_t()
4✔
5741
                                    .append_quoted(jfe.jfe_value.pp_value)
2✔
5742
                                    .append(" is not a defined value"))
2✔
5743
                            .with_snippet(jfe.jfe_value.to_snippet()));
4✔
5744
                } else {
5745
                    vd_iter->second->vd_line_format_index = format_index;
103,504✔
5746
                    switch (vd_iter->second->vd_meta.lvm_kind) {
103,504✔
5747
                        case value_kind_t::VALUE_INTEGER:
18,784✔
5748
                        case value_kind_t::VALUE_FLOAT:
5749
                            if (jfe.jfe_align
18,784✔
5750
                                == json_format_element::align_t::NONE)
5751
                            {
5752
                                jfe.jfe_align
5753
                                    = json_format_element::align_t::RIGHT;
17,856✔
5754
                            }
5755
                            break;
18,784✔
5756
                        default:
84,720✔
5757
                            break;
84,720✔
5758
                    }
5759
                }
5760
                break;
121,475✔
5761
            }
5762
            case json_log_field::CONSTANT:
53,056✔
5763
                this->jlf_line_format_init_count
53,056✔
5764
                    += std::count(jfe.jfe_default_value.begin(),
53,056✔
5765
                                  jfe.jfe_default_value.end(),
5766
                                  '\n');
53,056✔
5767
                break;
53,056✔
5768
            default:
×
5769
                break;
×
5770
        }
5771
    }
5772

5773
    for (auto& hd_pair : this->elf_highlighter_patterns) {
72,556✔
5774
        auto& hd = hd_pair.second;
3,234✔
5775
        auto attrs = vc.to_attrs(hd.hd_base_style, errors);
3,234✔
5776
        if (hd.hd_pattern.pp_value != nullptr) {
3,234✔
5777
            this->lf_highlighters.emplace_back(hd.hd_pattern.pp_value);
3,232✔
5778
            this->lf_highlighters.back()
3,232✔
5779
                .with_name(hd_pair.first.to_string())
6,464✔
5780
                .with_attrs(attrs)
3,232✔
5781
                .with_nestable(hd.hd_base_style.sc_nestable);
3,232✔
5782
        }
5783
    }
5784

5785
    switch (this->elf_type) {
69,322✔
5786
        case elf_type_t::ELF_TYPE_JSON:
13,666✔
5787
            this->lf_max_unrecognized_lines = 50;
13,666✔
5788
            break;
13,666✔
5789
        default:
55,656✔
5790
            break;
55,656✔
5791
    }
5792
}
5793

5794
void
5795
external_log_format::register_vtabs(
60,269✔
5796
    log_vtab_manager* vtab_manager,
5797
    std::vector<lnav::console::user_message>& errors)
5798
{
5799
    for (auto& elf_search_table : this->elf_search_tables) {
71,652✔
5800
        if (elf_search_table.second.std_pattern.pp_value == nullptr) {
11,383✔
5801
            continue;
1✔
5802
        }
5803

5804
        auto lst = std::make_shared<log_search_table>(
5805
            elf_search_table.second.std_pattern.pp_value,
11,382✔
5806
            elf_search_table.first);
11,382✔
5807
        lst->lst_format = this;
11,382✔
5808
        lst->lst_log_path_glob = elf_search_table.second.std_glob;
11,382✔
5809
        if (elf_search_table.second.std_level != LEVEL_UNKNOWN) {
11,382✔
5810
            lst->lst_log_level = elf_search_table.second.std_level;
4,065✔
5811
        }
5812
        auto errmsg = vtab_manager->register_vtab(lst);
11,382✔
5813
        if (!errmsg.empty()) {
11,382✔
5814
#if 0
5815
            errors.push_back("error:" + this->elf_name.to_string() + ":"
5816
                             + search_iter->first.to_string()
5817
                             + ":unable to register table -- " + errmsg);
5818
#endif
5819
        }
5820
    }
11,382✔
5821
}
60,269✔
5822

5823
bool
5824
external_log_format::match_samples(const std::vector<sample_t>& samples) const
5,128,398✔
5825
{
5826
    for (const auto& sample_iter : samples) {
22,701,590✔
5827
        for (const auto& pat_iter : this->elf_pattern_order) {
47,506,969✔
5828
            auto& pat = *pat_iter;
29,933,777✔
5829

5830
            if (!pat.p_pcre.pp_value) {
29,933,777✔
5831
                continue;
×
5832
            }
5833

5834
            if (pat.p_pcre.pp_value
59,867,554✔
5835
                    ->find_in(sample_iter.s_line.pp_value, PCRE2_NO_UTF_CHECK)
59,867,554✔
5836
                    .ignore_error())
59,867,554✔
5837
            {
5838
                return true;
19,444✔
5839
            }
5840
        }
5841
    }
5842

5843
    return false;
5,108,954✔
5844
}
5845

5846
class external_log_table : public log_format_vtab_impl {
5847
public:
5848
    explicit external_log_table(std::shared_ptr<const log_format> elf)
60,269✔
5849
        : log_format_vtab_impl(elf),
60,269✔
5850
          elt_format(dynamic_cast<const external_log_format*>(elf.get()))
60,269✔
5851
    {
5852
    }
60,269✔
5853

5854
    void get_columns(std::vector<vtab_column>& cols) const override
60,725✔
5855
    {
5856
        const auto& elf = this->elt_format;
60,725✔
5857

5858
        cols.resize(elf->elf_column_count);
60,725✔
5859
        for (const auto& vd : elf->elf_value_def_order) {
635,732✔
5860
            auto type_pair = logline_value_to_sqlite_type(vd->vd_meta.lvm_kind);
575,007✔
5861

5862
            if (!vd->vd_meta.lvm_column.is<logline_value_meta::table_column>())
575,007✔
5863
            {
5864
                continue;
58,905✔
5865
            }
5866

5867
            auto col
5868
                = vd->vd_meta.lvm_column.get<logline_value_meta::table_column>()
516,102✔
5869
                      .value;
516,102✔
5870
            require(0 <= col && col < elf->elf_column_count);
516,102✔
5871

5872
            cols[col].vc_name = vd->vd_meta.lvm_name.get();
516,102✔
5873
            cols[col].vc_type = type_pair.first;
516,102✔
5874
            cols[col].vc_subtype = type_pair.second;
516,102✔
5875
            cols[col].vc_collator = vd->vd_collate;
516,102✔
5876
            cols[col].vc_comment = vd->vd_description;
516,102✔
5877
        }
5878
    }
60,725✔
5879

5880
    void get_foreign_keys(
45,617✔
5881
        std::unordered_set<std::string>& keys_inout) const override
5882
    {
5883
        log_vtab_impl::get_foreign_keys(keys_inout);
45,617✔
5884

5885
        for (const auto& elf_value_def : this->elt_format->elf_value_defs) {
679,729✔
5886
            if (elf_value_def.second->vd_meta.lvm_foreign_key
634,112✔
5887
                || elf_value_def.second->vd_meta.lvm_identifier)
634,112✔
5888
            {
5889
                keys_inout.emplace(elf_value_def.first.to_string());
227,561✔
5890
            }
5891
        }
5892
    }
45,617✔
5893

5894
    bool next(log_cursor& lc, logfile_sub_source& lss) override
3,590✔
5895
    {
5896
        if (lc.is_eof()) {
3,590✔
5897
            return true;
×
5898
        }
5899

5900
        content_line_t cl(lss.at(lc.lc_curr_line));
3,590✔
5901
        auto* lf = lss.find_file_ptr(cl);
3,590✔
5902
        auto lf_iter = lf->begin() + cl;
3,590✔
5903
        if (lf_iter->is_continued()) {
3,590✔
5904
            return false;
×
5905
        }
5906

5907
        if (lf->get_format_name() == this->lfvi_format->get_name()) {
3,590✔
5908
            return true;
3,584✔
5909
        }
5910

5911
        return false;
6✔
5912
    }
5913

5914
    void extract(logfile* lf,
3,420✔
5915
                 uint64_t line_number,
5916
                 string_attrs_t& sa,
5917
                 logline_value_vector& values) override
5918
    {
5919
        auto format = lf->get_format();
3,420✔
5920

5921
        sa.clear();
3,420✔
5922
        format->annotate(lf, line_number, sa, values);
3,420✔
5923
        if (this->elt_format->elf_type
3,420✔
5924
                == external_log_format::elf_type_t::ELF_TYPE_TABULAR
5925
            && this->elt_format->tlf_extra_count > 0)
16✔
5926
        {
5927
            auto iter = this->elt_format->elf_value_defs.find(
6✔
5928
                log_format::LOG_EXTRA_FIELDS_STR);
5929
            require(iter != this->elt_format->elf_value_defs.end());
6✔
5930
            yajlpp_gen gen;
6✔
5931

5932
            {
5933
                yajlpp_map root(gen);
6✔
5934

5935
                for (const auto& lv : values.lvv_values) {
52✔
5936
                    if (lv.lv_meta.lvm_struct_name
46✔
5937
                        != log_format::LOG_EXTRA_FIELDS_STR)
46✔
5938
                    {
5939
                        continue;
28✔
5940
                    }
5941

5942
                    root.gen(lv.lv_meta.lvm_name);
18✔
5943
                    switch (lv.lv_meta.lvm_kind) {
18✔
5944
                        case value_kind_t::VALUE_NULL:
1✔
5945
                            root.gen();
1✔
5946
                            break;
1✔
5947
                        case value_kind_t::VALUE_INTEGER:
4✔
5948
                            root.gen(lv.lv_value.i);
4✔
5949
                            break;
4✔
5950
                        case value_kind_t::VALUE_FLOAT:
4✔
5951
                            root.gen(lv.lv_value.d);
4✔
5952
                            break;
4✔
5953
                        default:
9✔
5954
                            root.gen(lv.text_value_fragment());
9✔
5955
                            break;
9✔
5956
                    }
5957
                }
5958
            }
6✔
5959
            auto owned_sf
5960
                = gen.to_string_fragment().to_owned(values.lvv_allocator);
6✔
5961
            values.lvv_values.emplace_back(iter->second->vd_meta, owned_sf);
6✔
5962
        }
6✔
5963
    }
3,420✔
5964

5965
    const external_log_format* elt_format;
5966
    line_range elt_container_body;
5967
};
5968

5969
std::shared_ptr<log_vtab_impl>
5970
external_log_format::get_vtab_impl() const
60,269✔
5971
{
5972
    return std::make_shared<external_log_table>(this->shared_from_this());
60,269✔
5973
}
5974

5975
std::shared_ptr<log_format>
5976
external_log_format::specialized(int fmt_lock)
615✔
5977
{
5978
    auto retval = std::make_shared<external_log_format>(*this);
615✔
5979

5980
    retval->lf_specialized = true;
615✔
5981
    if (this->elf_type == elf_type_t::ELF_TYPE_JSON) {
615✔
5982
        this->jlf_parse_context
5983
            = std::make_shared<yajlpp_parse_context>(this->elf_name);
54✔
5984
        this->jlf_yajl_handle.reset(
54✔
5985
            yajl_alloc(&this->jlf_parse_context->ypc_callbacks,
54✔
5986
                       nullptr,
5987
                       this->jlf_parse_context.get()),
54✔
5988
            yajl_handle_deleter());
5989
        yajl_config(this->jlf_yajl_handle.get(), yajl_dont_validate_strings, 1);
54✔
5990
        this->jlf_attr_line.al_string.reserve(16 * 1024);
54✔
5991
    }
5992

5993
    this->elf_specialized_value_defs_state = *this->elf_value_defs_state;
615✔
5994

5995
    return retval;
1,230✔
5996
}
615✔
5997

5998
log_format::match_name_result
5999
external_log_format::match_name(const std::string& filename)
976,605✔
6000
{
6001
    if (this->elf_filename_pcre.pp_value == nullptr) {
976,605✔
6002
        return name_matched{};
974,651✔
6003
    }
6004

6005
    if (this->elf_filename_pcre.pp_value->find_in(filename)
3,908✔
6006
            .ignore_error()
3,908✔
6007
            .has_value())
1,954✔
6008
    {
6009
        return name_matched{};
316✔
6010
    }
6011

6012
    return name_mismatched{
3,276✔
6013
        this->elf_filename_pcre.pp_value->match_partial(filename),
3,276✔
6014
        this->elf_filename_pcre.pp_value->get_pattern(),
1,638✔
6015
    };
1,638✔
6016
}
6017

6018
auto
6019
external_log_format::value_line_count(scan_batch_context& sbc,
321,133✔
6020
                                      const value_def* vd,
6021
                                      bool top_level,
6022
                                      std::optional<double> val,
6023
                                      const unsigned char* str,
6024
                                      ssize_t len,
6025
                                      yajl_string_props_t* props)
6026
    -> value_line_count_result
6027
{
6028
    value_line_count_result retval;
321,133✔
6029

6030
    if (str != nullptr && props != nullptr && !val) {
321,133✔
6031
        auto frag = string_fragment::from_bytes(str, len);
234,785✔
6032
        while (frag.endswith("\n")) {
235,837✔
6033
            frag.pop_back();
1,052✔
6034
            props->line_feeds -= 1;
1,052✔
6035
        }
6036
        retval.vlcr_has_ansi |= props->has_ansi;
234,785✔
6037
        retval.vlcr_count += props->line_feeds;
234,785✔
6038
    }
6039

6040
    if (vd == nullptr) {
321,133✔
6041
        if (this->jlf_hide_extra || !top_level) {
294,007✔
6042
            retval.vlcr_count = 0;
272,389✔
6043
        }
6044

6045
        return retval;
294,007✔
6046
    }
6047

6048
    if (vd->vd_meta.lvm_values_index) {
27,126✔
6049
        auto& lvs = sbc.sbc_value_stats[vd->vd_meta.lvm_values_index.value()];
8,566✔
6050
        if (len > lvs.lvs_width) {
8,566✔
6051
            lvs.lvs_width = len;
4,452✔
6052
        }
6053
        if (val) {
8,566✔
6054
            lvs.add_value(val.value());
247✔
6055
        } else if (vd->vd_meta.lvm_identifier && str != nullptr && len > 0) {
8,319✔
6056
            // Identifier fields parsed as strings (no numeric `val`)
6057
            // contribute to the column's distinct-count estimate
6058
            // instead, mirroring the regex/tabular paths.
6059
            lvs.add_text(string_fragment::from_bytes(str, len));
2,225✔
6060
        }
6061
    }
6062

6063
    if (vd->vd_line_format_index) {
27,126✔
6064
        retval.vlcr_line_format_count += 1;
4,227✔
6065
        retval.vlcr_count -= 1;
4,227✔
6066
        retval.vlcr_line_format_index = vd->vd_line_format_index;
4,227✔
6067
    }
6068
    if (vd->vd_meta.is_hidden()) {
27,126✔
6069
        retval.vlcr_count = 0;
3,973✔
6070
        return retval;
3,973✔
6071
    }
6072

6073
    return retval;
23,153✔
6074
}
6075

6076
log_level_t
6077
external_log_format::convert_level(string_fragment sf,
249,543✔
6078
                                   scan_batch_context* sbc) const
6079
{
6080
    auto retval = LEVEL_INFO;
249,543✔
6081

6082
    if (sf.is_valid()) {
249,543✔
6083
        if (sbc != nullptr) {
194,352✔
6084
            auto ssm_res = sbc->sbc_level_cache.lookup(sf);
12,603✔
6085
            if (ssm_res.has_value()) {
12,603✔
6086
                return static_cast<log_level_t>(ssm_res.value());
5,187✔
6087
            }
6088
        }
6089

6090
        if (this->elf_level_patterns.empty()) {
189,165✔
6091
            retval = string2level(sf.data(), sf.length());
42,224✔
6092
        } else {
6093
            for (const auto& elf_level_pattern : this->elf_level_patterns) {
396,127✔
6094
                if (elf_level_pattern.second.lp_pcre.pp_value
727,524✔
6095
                        ->find_in(sf, PCRE2_NO_UTF_CHECK)
727,524✔
6096
                        .ignore_error()
727,524✔
6097
                        .has_value())
363,762✔
6098
                {
6099
                    retval = elf_level_pattern.first;
114,576✔
6100
                    break;
114,576✔
6101
                }
6102
            }
6103
        }
6104

6105
        if (sbc != nullptr
189,165✔
6106
            && sf.length() <= lnav::small_string_map::MAX_KEY_SIZE)
189,165✔
6107
        {
6108
            sbc->sbc_level_cache.insert(sf, retval);
6,446✔
6109
        }
6110
    }
6111

6112
    return retval;
244,356✔
6113
}
6114

6115
logline_value_meta
6116
external_log_format::get_value_meta(intern_string_t field_name,
2,301✔
6117
                                    value_kind_t kind) const
6118
{
6119
    const auto iter = this->elf_value_defs.find(field_name);
2,301✔
6120
    if (iter == this->elf_value_defs.end()) {
2,301✔
6121
        auto retval
6122
            = logline_value_meta(field_name,
6123
                                 kind,
6124
                                 logline_value_meta::external_column{},
×
6125
                                 const_cast<external_log_format*>(this));
245✔
6126

6127
        retval.lvm_hidden = this->jlf_hide_extra;
245✔
6128
        return retval;
245✔
6129
    }
245✔
6130

6131
    auto lvm = iter->second->vd_meta;
2,056✔
6132

6133
    lvm.lvm_kind = kind;
2,056✔
6134
    return lvm;
2,056✔
6135
}
2,056✔
6136

6137
logline_value_meta
6138
external_log_format::get_value_meta(yajlpp_parse_context* ypc,
5,158✔
6139
                                    const value_def* vd,
6140
                                    value_kind_t kind)
6141
{
6142
    if (vd == nullptr) {
5,158✔
6143
        auto retval = logline_value_meta(
6144
            ypc->get_path(), kind, logline_value_meta::external_column{}, this);
367✔
6145

6146
        retval.lvm_hidden = this->jlf_hide_extra;
367✔
6147
        return retval;
367✔
6148
    }
367✔
6149

6150
    auto lvm = vd->vd_meta;
4,791✔
6151

6152
    switch (vd->vd_meta.lvm_kind) {
4,791✔
6153
        case value_kind_t::VALUE_TIMESTAMP:
129✔
6154
            break;
129✔
6155
        default:
4,662✔
6156
            lvm.lvm_kind = kind;
4,662✔
6157
            break;
4,662✔
6158
    }
6159
    return lvm;
4,791✔
6160
}
4,791✔
6161

6162
void
6163
external_log_format::json_append(const log_format_file_state& lffs,
3,671✔
6164
                                 const json_format_element& jfe,
6165
                                 const value_def* vd,
6166
                                 const string_fragment& sf)
6167
{
6168
    if (jfe.jfe_align == json_format_element::align_t::RIGHT) {
3,671✔
6169
        auto sf_width = sf.column_width();
195✔
6170
        if (sf_width < jfe.jfe_min_width) {
195✔
6171
            this->json_append_to_cache(jfe.jfe_min_width - sf_width);
8✔
6172
        } else if (jfe.jfe_auto_width && vd != nullptr
24✔
6173
                   && sf_width
211✔
6174
                       < lffs.lffs_value_stats[vd->vd_meta.lvm_values_index
24✔
6175
                                                   .value()]
24✔
6176
                             .lvs_width)
24✔
6177
        {
6178
            this->json_append_to_cache(
8✔
6179
                lffs.lffs_value_stats[vd->vd_meta.lvm_values_index.value()]
8✔
6180
                    .lvs_width
8✔
6181
                - sf_width);
8✔
6182
        }
6183
    }
6184
    this->json_append_to_cache(sf.data(), sf.length());
3,671✔
6185
    if ((jfe.jfe_align == json_format_element::align_t::LEFT
3,671✔
6186
         || jfe.jfe_align == json_format_element::align_t::NONE)
3,642✔
6187
        && (jfe.jfe_min_width > 0 || jfe.jfe_auto_width))
3,476✔
6188
    {
6189
        auto sf_width = sf.column_width();
582✔
6190
        if (sf_width < jfe.jfe_min_width) {
582✔
6191
            this->json_append_to_cache(jfe.jfe_min_width - sf_width);
155✔
6192
        } else if (jfe.jfe_auto_width && vd != nullptr
372✔
6193
                   && sf_width
799✔
6194
                       < lffs.lffs_value_stats[vd->vd_meta.lvm_values_index
305✔
6195
                                                   .value()]
305✔
6196
                             .lvs_width)
305✔
6197
        {
6198
            this->json_append_to_cache(
148✔
6199
                lffs.lffs_value_stats[vd->vd_meta.lvm_values_index.value()]
148✔
6200
                    .lvs_width
148✔
6201
                - sf_width);
148✔
6202
        }
6203
    }
6204
}
3,671✔
6205

6206
intern_string_t
6207
external_log_format::get_pattern_name(const pattern_locks& pl,
88✔
6208
                                      uint64_t line_number) const
6209
{
6210
    if (this->elf_type != elf_type_t::ELF_TYPE_TEXT) {
88✔
6211
        static auto structured = intern_string::lookup("structured");
6212

6213
        return structured;
×
6214
    }
6215
    auto pat_index = pl.pattern_index_for_line(line_number);
88✔
6216
    return this->elf_pattern_order[pat_index]->p_name;
88✔
6217
}
6218

6219
std::string
6220
log_format::get_pattern_path(const pattern_locks& pl,
×
6221
                             uint64_t line_number) const
6222
{
6223
    auto pat_index = pl.pattern_index_for_line(line_number);
×
6224
    return fmt::format(FMT_STRING("builtin ({})"), pat_index);
×
6225
}
6226

6227
intern_string_t
6228
log_format::get_pattern_name(const pattern_locks& pl,
29✔
6229
                             uint64_t line_number) const
6230
{
6231
    char pat_str[128];
6232

6233
    auto pat_index = pl.pattern_index_for_line(line_number);
29✔
6234
    auto to_n_res = fmt::format_to_n(
×
6235
        pat_str, sizeof(pat_str) - 1, FMT_STRING("builtin ({})"), pat_index);
87✔
6236
    pat_str[to_n_res.size] = '\0';
29✔
6237
    return intern_string::lookup(pat_str);
58✔
6238
}
6239

6240
std::shared_ptr<log_format>
6241
log_format::find_root_format(const char* name)
1,644✔
6242
{
6243
    auto& fmts = get_root_formats();
1,644✔
6244
    for (auto& lf : fmts) {
68,617✔
6245
        if (lf->get_name() == name) {
68,617✔
6246
            return lf;
1,644✔
6247
        }
6248
    }
6249
    return nullptr;
×
6250
}
6251

6252
exttm
6253
log_format::tm_for_display(logfile::iterator ll, string_fragment sf)
1,248✔
6254
{
6255
    auto adjusted_time = ll->get_timeval();
1,248✔
6256
    exttm retval;
1,248✔
6257

6258
    retval.et_nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(
1,248✔
6259
                         std::chrono::microseconds{adjusted_time.tv_usec})
×
6260
                         .count();
1,248✔
6261
    if (this->lf_timestamp_flags & ETF_NANOS_SET) {
1,248✔
6262
        timeval actual_tv;
6263
        exttm tm;
5✔
6264
        if (this->lf_date_time.scan(sf.data(),
10✔
6265
                                    sf.length(),
5✔
6266
                                    this->get_timestamp_formats(),
6267
                                    &tm,
6268
                                    actual_tv,
6269
                                    false))
6270
        {
6271
            adjusted_time.tv_usec = actual_tv.tv_usec;
5✔
6272
            retval.et_nsec = tm.et_nsec;
5✔
6273
        }
6274
    }
6275
    gmtime_r(&adjusted_time.tv_sec, &retval.et_tm);
1,248✔
6276
    retval.et_flags = this->lf_timestamp_flags;
1,248✔
6277
    if (this->lf_timestamp_flags & ETF_ZONE_SET
1,248✔
6278
        && this->lf_date_time.dts_zoned_to_local)
1,150✔
6279
    {
6280
        retval.et_flags &= ~ETF_Z_IS_UTC;
1,150✔
6281
    }
6282
    retval.et_gmtoff = this->lf_date_time.dts_local_offset_cache;
1,248✔
6283

6284
    return retval;
2,496✔
6285
}
6286

6287
pattern_for_lines::pattern_for_lines(uint32_t pfl_line, uint32_t pfl_pat_index)
3,940✔
6288
    : pfl_line(pfl_line), pfl_pat_index(pfl_pat_index)
3,940✔
6289
{
6290
}
3,940✔
6291

6292
void
6293
logline_value_stats::merge(const logline_value_stats& other)
19,154✔
6294
{
6295
    if (other.lvs_width > this->lvs_width) {
19,154✔
6296
        this->lvs_width = other.lvs_width;
4,969✔
6297
    }
6298

6299
    // Distinct-count merge runs before the lvs_count == 0 short-circuit
6300
    // because a text-only column has count == 0 but may still carry an
6301
    // HLL whose registers we need to fold in.
6302
    this->lvs_text_count += other.lvs_text_count;
19,154✔
6303
    if (other.lvs_distinct) {
19,154✔
6304
        if (this->lvs_distinct) {
4,242✔
6305
            this->lvs_distinct->merge(other.lvs_distinct.value());
1,984✔
6306
        } else {
6307
            this->lvs_distinct = other.lvs_distinct;
2,258✔
6308
        }
6309
    }
6310

6311
    if (other.lvs_count == 0) {
19,154✔
6312
        return;
17,534✔
6313
    }
6314

6315
    require(other.lvs_min_value <= other.lvs_max_value);
1,620✔
6316

6317
    if (other.lvs_min_value < this->lvs_min_value) {
1,620✔
6318
        this->lvs_min_value = other.lvs_min_value;
1,162✔
6319
    }
6320
    if (other.lvs_max_value > this->lvs_max_value) {
1,620✔
6321
        this->lvs_max_value = other.lvs_max_value;
1,536✔
6322
    }
6323
    this->lvs_count += other.lvs_count;
1,620✔
6324
    this->lvs_total += other.lvs_total;
1,620✔
6325
    this->lvs_tdigest.insert(other.lvs_tdigest);
1,620✔
6326
    ensure(this->lvs_count >= 0);
1,620✔
6327
    ensure(this->lvs_min_value <= this->lvs_max_value);
1,620✔
6328
}
6329

6330
void
6331
logline_value_stats::add_value(double value)
33,419✔
6332
{
6333
    if (value < this->lvs_min_value) {
33,419✔
6334
        this->lvs_min_value = value;
1,889✔
6335
    }
6336
    if (value > this->lvs_max_value) {
33,419✔
6337
        this->lvs_max_value = value;
2,825✔
6338
    }
6339
    this->lvs_count += 1;
33,419✔
6340
    this->lvs_total += value;
33,419✔
6341
    this->lvs_tdigest.insert(value);
33,419✔
6342
}
33,419✔
6343

6344
void
6345
logline_value_stats::add_text(string_fragment sf)
32,023✔
6346
{
6347
    if (!this->lvs_distinct) {
32,023✔
6348
        this->lvs_distinct.emplace(12);
5,397✔
6349
    }
6350
    this->lvs_distinct->add(sf.data(), static_cast<uint32_t>(sf.length()));
32,023✔
6351
    this->lvs_text_count += 1;
32,023✔
6352
}
32,023✔
6353

6354
std::optional<double>
6355
logline_value_stats::distinct_estimate() const
49✔
6356
{
6357
    if (!this->lvs_distinct) {
49✔
6358
        return std::nullopt;
13✔
6359
    }
6360
    return this->lvs_distinct->estimate();
36✔
6361
}
6362

6363
void
6364
logline_value_stats::finalize()
37,933✔
6365
{
6366
    this->lvs_tdigest.merge();
37,933✔
6367
}
37,933✔
6368

6369
std::vector<logline_value_meta>
6370
external_log_format::get_value_metadata() const
11,384✔
6371
{
6372
    std::vector<logline_value_meta> retval;
11,384✔
6373

6374
    for (const auto& vd : this->elf_value_def_order) {
119,600✔
6375
        retval.emplace_back(vd->vd_meta);
108,216✔
6376
    }
6377

6378
    return retval;
11,384✔
6379
}
×
6380

6381
std::optional<size_t>
6382
external_log_format::stats_index_for_value(const intern_string_t& name) const
127✔
6383
{
6384
    const auto iter = this->elf_value_defs.find(name);
127✔
6385
    if (iter != this->elf_value_defs.end()
127✔
6386
        && iter->second->vd_meta.lvm_values_index)
127✔
6387
    {
6388
        return iter->second->vd_meta.lvm_values_index.value();
127✔
6389
    }
6390

6391
    return std::nullopt;
×
6392
}
6393

6394
std::string
6395
external_log_format::get_pattern_regex(const pattern_locks& pl,
17✔
6396
                                       uint64_t line_number) const
6397
{
6398
    if (this->elf_type != elf_type_t::ELF_TYPE_TEXT) {
17✔
6399
        return "";
2✔
6400
    }
6401
    auto pat_index = pl.pattern_index_for_line(line_number);
16✔
6402
    return this->elf_pattern_order[pat_index]->p_pcre.pp_value->get_pattern();
16✔
6403
}
6404

6405
bool
6406
external_log_format::hide_field(const intern_string_t field_name, bool val)
11,377✔
6407
{
6408
    const auto vd_iter = this->elf_value_defs.find(field_name);
11,377✔
6409
    if (vd_iter == this->elf_value_defs.end()) {
11,377✔
6410
        log_warning("field to hide not found: %s.%s",
1✔
6411
                    this->elf_name.c_str(),
6412
                    field_name.c_str());
6413
        return false;
1✔
6414
    }
6415

6416
    vd_iter->second->vd_meta.lvm_user_hidden = val;
11,376✔
6417
    if (!this->jlf_line_format.empty()) {
11,376✔
6418
        bool found = false;
3,393✔
6419

6420
        if (!field_name.to_string_fragment().find('#')) {
3,393✔
6421
            for (const auto& jfe : this->jlf_line_format) {
41,326✔
6422
                if (jfe.jfe_value.pp_value == field_name) {
39,166✔
6423
                    log_debug(
1,187✔
6424
                        "hide-field not triggering rebuild since it is in "
6425
                        "line-format: %s.%s",
6426
                        this->elf_name.c_str(),
6427
                        field_name.c_str());
6428
                    found = true;
1,187✔
6429
                    break;
1,187✔
6430
                }
6431
            }
6432
        }
6433
        if (!found) {
3,393✔
6434
            log_info("format field %s.%s changed, rebuilding",
2,206✔
6435
                     this->elf_name.get(),
6436
                     field_name.get());
6437
            this->elf_value_defs_state->vds_generation += 1;
2,206✔
6438
        }
6439
    }
6440
    return true;
11,376✔
6441
}
6442

6443
bool
6444
external_log_format::format_changed()
3,286✔
6445
{
6446
    if (this->elf_specialized_value_defs_state.vds_generation
6,572✔
6447
        != this->elf_value_defs_state->vds_generation)
3,286✔
6448
    {
6449
        this->elf_specialized_value_defs_state = *this->elf_value_defs_state;
11✔
6450
        this->jlf_cached_offset = -1;
11✔
6451
        return true;
11✔
6452
    }
6453

6454
    return false;
3,275✔
6455
}
6456

6457
bool
6458
format_tag_def::path_restriction::matches(const char* fn) const
9✔
6459
{
6460
    return fnmatch(this->p_glob.c_str(), fn, 0) == 0;
9✔
6461
}
6462

6463
bool
6464
format_partition_def::path_restriction::matches(const char* fn) const
9✔
6465
{
6466
    return fnmatch(this->p_glob.c_str(), fn, 0) == 0;
9✔
6467
}
6468

6469
std::optional<pattern_for_lines>
6470
pattern_locks::get_pattern_for_line(uint64_t line_number) const
12,276✔
6471
{
6472
    if (this->pl_lines.empty()) {
12,276✔
6473
        return std::nullopt;
2,557✔
6474
    }
6475

6476
    auto iter
6477
        = std::lower_bound(this->pl_lines.cbegin(),
9,719✔
6478
                           this->pl_lines.cend(),
6479
                           line_number,
6480
                           [](const pattern_for_lines& pfl, uint32_t line) {
9,829✔
6481
                               return pfl.pfl_line < line;
9,829✔
6482
                           });
6483

6484
    if (iter == this->pl_lines.end() || iter->pfl_line != line_number) {
9,719✔
6485
        --iter;
8,697✔
6486
    }
6487

6488
    return *iter;
9,719✔
6489
}
6490

6491
int
6492
pattern_locks::pattern_index_for_line(uint64_t line_number) const
7,367✔
6493
{
6494
    auto pat_opt = this->get_pattern_for_line(line_number);
7,367✔
6495
    if (pat_opt.has_value()) {
7,367✔
6496
        return pat_opt.value().pfl_pat_index;
7,344✔
6497
    }
6498

6499
    return -1;
23✔
6500
}
6501

6502
/* XXX */
6503
#include "log_format_impls.cc"
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc