• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tstack / lnav / 25348852825-3024

04 May 2026 11:18PM UTC coverage: 69.963% (+0.7%) from 69.226%
25348852825-3024

push

github

tstack
[ui] horizontal scroll should work on columns

Related to #1685

7 of 141 new or added lines in 5 files covered. (4.96%)

7760 existing lines in 84 files now uncovered.

57014 of 81492 relevant lines covered (69.96%)

622491.44 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.9
/src/log_format_impls.cc
1
/**
2
 * Copyright (c) 2007-2017, Timothy Stack
3
 *
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * * Redistributions of source code must retain the above copyright notice, this
10
 * list of conditions and the following disclaimer.
11
 * * Redistributions in binary form must reproduce the above copyright notice,
12
 * this list of conditions and the following disclaimer in the documentation
13
 * and/or other materials provided with the distribution.
14
 * * Neither the name of Timothy Stack nor the names of its contributors
15
 * may be used to endorse or promote products derived from this software
16
 * without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 *
29
 * @file log_format_impls.cc
30
 */
31

32
#include <algorithm>
33
#include <chrono>
34
#include <memory>
35
#include <utility>
36

37
#include "log_format.hh"
38

39
#include <stdio.h>
40

41
#include "base/humanize.hh"
42
#include "base/injector.bind.hh"
43
#include "base/separated_string.hh"
44
#include "base/string_attr_type.hh"
45
#include "config.h"
46
#include "formats/logfmt/logfmt.parser.hh"
47
#include "log_vtab_impl.hh"
48
#include "ptimec.hh"
49
#include "scn/scan.h"
50
#include "sql_util.hh"
51
#include "yajlpp/yajlpp.hh"
52

53
using std::string_literals::operator""s;
54

55
class piper_log_format : public log_format {
56
public:
57
    const intern_string_t get_name() const override
18,162✔
58
    {
59
        static const intern_string_t RETVAL
60
            = intern_string::lookup("lnav_piper_log");
19,984✔
61

62
        return RETVAL;
18,162✔
63
    }
64

65
    scan_result_t scan(logfile& lf,
14,681✔
66
                       std::vector<logline>& dst,
67
                       const line_info& li,
68
                       shared_buffer_ref& sbr,
69
                       scan_batch_context& sbc) override
70
    {
71
        if (lf.has_line_metadata()
14,681✔
72
            && lf.get_text_format() == text_format_t::TF_LOG)
14,681✔
73
        {
74
            auto& ll = dst.back();
293✔
75
            ll.set_time(li.li_timestamp);
293✔
76
            ll.set_level(li.li_level);
293✔
77
            return scan_match{1};
293✔
78
        }
79

80
        return scan_no_match{"not a piper capture"};
14,388✔
81
    }
82

83
    static constexpr int TIMESTAMP_SIZE = 28;
84

85
    void annotate(logfile* lf,
59✔
86
                  uint64_t line_number,
87
                  string_attrs_t& sa,
88
                  logline_value_vector& values) const override
89
    {
90
        auto lr = line_range{0, TIMESTAMP_SIZE};
59✔
91
        sa.emplace_back(lr, L_TIMESTAMP.value());
59✔
92
        log_format::annotate(lf, line_number, sa, values);
59✔
93
    }
59✔
94

95
    void get_subline(const log_format_file_state& lffs,
385✔
96
                     const logline& ll,
97
                     shared_buffer_ref& sbr,
98
                     subline_options opts) override
99
    {
100
        this->plf_cached_line.resize(TIMESTAMP_SIZE);
385✔
101
        auto tlen = sql_strftime(this->plf_cached_line.data(),
385✔
102
                                 this->plf_cached_line.size(),
103
                                 ll.get_timeval(),
385✔
104
                                 'T');
105
        this->plf_cached_line.resize(tlen);
385✔
106
        {
107
            char zone_str[16];
108
            exttm tmptm;
385✔
109

110
            tmptm.et_flags |= ETF_ZONE_SET;
385✔
111
            tmptm.et_gmtoff
112
                = lnav::local_time_to_info(
770✔
113
                      date::local_seconds{ll.get_time<std::chrono::seconds>()})
385✔
114
                      .first.offset.count();
385✔
115
            off_t zone_len = 0;
385✔
116
            ftime_z(zone_str, zone_len, sizeof(zone_str), tmptm);
385✔
117
            for (off_t lpc = 0; lpc < zone_len; lpc++) {
2,310✔
118
                this->plf_cached_line.push_back(zone_str[lpc]);
1,925✔
119
            }
120
        }
121
        this->plf_cached_line.push_back(' ');
385✔
122
        const auto prefix_len = this->plf_cached_line.size();
385✔
123
        this->plf_cached_line.resize(this->plf_cached_line.size()
770✔
124
                                     + sbr.length());
385✔
125
        memcpy(
385✔
126
            &this->plf_cached_line[prefix_len], sbr.get_data(), sbr.length());
385✔
127

128
        sbr.share(this->plf_share_manager,
770✔
129
                  this->plf_cached_line.data(),
385✔
130
                  this->plf_cached_line.size());
131
    }
385✔
132

133
    std::shared_ptr<log_format> specialized(int fmt_lock) override
6✔
134
    {
135
        auto retval = std::make_shared<piper_log_format>(*this);
6✔
136

137
        retval->lf_specialized = true;
6✔
138
        retval->lf_timestamp_flags |= ETF_ZONE_SET | ETF_MICROS_SET;
6✔
139
        return retval;
12✔
140
    }
6✔
141

142
private:
143
    shared_buffer plf_share_manager;
144
    std::vector<char> plf_cached_line;
145
};
146

147
class o1_generic_log_format : public log_format {
148
public:
149
    static const pcre_format* get_pcre_log_formats()
14,749✔
150
    {
151
        static const pcre_format log_fmt[] = {
152
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>@[0-9a-zA-Z]{16,24}))"),
153
            pcre_format(
154
                R"((?x)^
155
  (?:\*\*\*\s+)?                              # optional "*** " prefix
156
  (?<timestamp>
157
      (?:
158
          \s
159
        | \d{4}[\-\/]\d{2}[\-\/]\d{2}         # YYYY-MM-DD or YYYY/MM/DD
160
        | T                                   # ISO date/time separator
161
        | \d{1,2}:\d{2}(?::\d{2}(?:[\.,]\d{1,9})?)?   # HH:MM[:SS[.frac]]
162
        | Z                                   # UTC zulu marker
163
        | [+\-]\d{2}:?\d{2}                   # timezone offset, +0500 or +05:00
164
        | (?!DBG|DEBUG\d?|ERR|INFO|WARN|NONE|CRITICAL|FATAL)    # ...not one of these levels
165
          [A-Z]{3,4}                          # 3-4 uppercase letters (e.g. month/tz abbrev)
166
      )+
167
  )
168
  [:|\s]?                                     # optional separator
169
  (trc|trace|critical|fatal|dbg\d?|debug\d?|info|warn(?:ing)?|err(?:or)?)   # log level
170
  [:|\s]                                      # separator
171
  \s*
172
)"),
173
            pcre_format(
174
                R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,+/-]+) \[(trace|debug\d?|info|warn(?:ing)?|error|critical|fatal)\]\s+)"),
175
            pcre_format(
176
                R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,+/-]+) -- (trace|debug\d?|info|warn(?:ing)?|error|critical|fatal) --\s+)"),
177

178
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+/\.-]+) \[\w\s+)"),
179
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+,/\.-]+)\s+)"),
180
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+,/\.-]+) -\s+)"),
181
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,/-]+) -\s+)"),
182
            pcre_format(
183
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\] \[(trace|debug\d?|info|warn(?:ing)?|error|critical|fatal)\]\s+)"),
184
            pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w: "
185
                        "\\.,/-]+)\\[[^\\]]+\\]\\s+"),
186
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,/-]+)\s+)"),
187

188
            pcre_format(
189
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\]\s*(\w+):?\s+)"),
190
            pcre_format(
191
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\]\s+)"),
192
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: "
193
                        "\\.,+/-]+)\\] \\w+\\s+"),
194
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: ,+/-]+)\\] "
195
                        "\\(\\d+\\)\\s+"),
196

197
            pcre_format(),
198
        };
14,749✔
199

200
        return log_fmt;
14,749✔
201
    }
202

UNCOV
203
    std::string get_pattern_regex(const pattern_locks& pl,
×
204
                                  uint64_t line_number) const override
205
    {
UNCOV
206
        auto pat_index = pl.pattern_index_for_line(line_number);
×
UNCOV
207
        return get_pcre_log_formats()[pat_index].name;
×
208
    }
209

210
    const intern_string_t get_name() const override
17,717✔
211
    {
212
        static const intern_string_t RETVAL
213
            = intern_string::lookup("generic_log");
19,539✔
214

215
        return RETVAL;
17,717✔
216
    }
217

218
    scan_result_t scan(logfile& lf,
14,644✔
219
                       std::vector<logline>& dst,
220
                       const line_info& li,
221
                       shared_buffer_ref& sbr,
222
                       scan_batch_context& sbc) override
223
    {
224
        exttm log_time;
14,644✔
225
        timeval log_tv;
226
        string_fragment ts;
14,644✔
227
        std::optional<string_fragment> level;
14,644✔
228
        const char* last_pos;
229

230
        if (dst.size() == 1) {
14,644✔
231
            auto file_options = lf.get_file_options();
1,401✔
232

233
            if (file_options) {
1,401✔
234
                this->lf_date_time.dts_default_zone
235
                    = file_options->second.fo_default_zone.pp_value;
60✔
236
            } else {
237
                this->lf_date_time.dts_default_zone = nullptr;
1,341✔
238
            }
239
        }
1,401✔
240

241
        if ((last_pos = this->log_scanf(sbc,
29,288✔
242
                                        dst.size(),
14,644✔
243
                                        sbr.to_string_fragment(),
244
                                        get_pcre_log_formats(),
245
                                        nullptr,
246
                                        &log_time,
247
                                        &log_tv,
248

249
                                        &ts,
250
                                        &level))
251
            != nullptr)
14,644✔
252
        {
253
            auto level_val = log_level_t::LEVEL_UNKNOWN;
3,311✔
254
            if (level) {
3,311✔
255
                level_val = string2level(level->data(), level->length());
3,311✔
256
            }
257

258
            if (!((log_time.et_flags & ETF_DAY_SET)
3,311✔
259
                  && (log_time.et_flags & ETF_MONTH_SET)
3,236✔
260
                  && (log_time.et_flags & ETF_YEAR_SET)))
3,236✔
261
            {
262
                this->check_for_new_year(dst, log_time, log_tv);
990✔
263
            }
264

265
            if (!(this->lf_timestamp_flags
6,622✔
266
                  & (ETF_MILLIS_SET | ETF_MICROS_SET | ETF_NANOS_SET))
3,311✔
267
                && !dst.empty()
2,902✔
268
                && dst.back().get_time<std::chrono::seconds>().count()
2,902✔
269
                    == log_tv.tv_sec
2,902✔
270
                && dst.back()
7,909✔
271
                        .get_subsecond_time<std::chrono::microseconds>()
5,007✔
272
                        .count()
1,696✔
273
                    != 0)
274
            {
275
                auto log_us
UNCOV
276
                    = dst.back()
×
UNCOV
277
                          .get_subsecond_time<std::chrono::microseconds>();
×
278

279
                log_time.et_nsec
UNCOV
280
                    = std::chrono::duration_cast<std::chrono::nanoseconds>(
×
281
                          log_us)
UNCOV
282
                          .count();
×
283
                log_tv.tv_usec
UNCOV
284
                    = std::chrono::duration_cast<std::chrono::microseconds>(
×
285
                          log_us)
UNCOV
286
                          .count();
×
287
            }
288

289
            auto log_us = to_us(log_tv);
3,311✔
290
            auto tid_iter = sbc.sbc_tids.insert_tid(
3,311✔
UNCOV
291
                sbc.sbc_allocator, string_fragment{}, log_us);
×
292
            tid_iter->second.titr_level_stats.update_msg_count(level_val);
3,311✔
293
            auto& ll = dst.back();
3,311✔
294
            ll.set_time(log_us);
3,311✔
295
            ll.set_level(level_val);
3,311✔
296
            return scan_match{5};
3,311✔
297
        }
298

299
        return scan_no_match{"no patterns matched"};
11,333✔
300
    }
301

302
    void annotate(logfile* lf,
105✔
303
                  uint64_t line_number,
304
                  string_attrs_t& sa,
305
                  logline_value_vector& values) const override
306
    {
307
        thread_local auto md = lnav::pcre2pp::match_data::unitialized();
105✔
308
        auto lffs = lf->get_format_file_state();
105✔
309
        auto& line = values.lvv_sbr;
105✔
310
        int pat_index
311
            = lffs.lffs_pattern_locks.pattern_index_for_line(line_number);
105✔
312
        const auto& fmt = get_pcre_log_formats()[pat_index];
105✔
313
        const auto line_sf = line.to_string_fragment();
105✔
314
        auto match_res = fmt.pcre->capture_from(line_sf)
105✔
315
                             .into(md)
105✔
316
                             .matches(PCRE2_NO_UTF_CHECK)
210✔
317
                             .ignore_error();
105✔
318
        if (!match_res) {
105✔
319
            return;
12✔
320
        }
321

322
        int prefix_len = md.remaining().sf_begin;
93✔
323
        auto ts_cap = md[fmt.pf_timestamp_index].value();
93✔
324
        auto lr = to_line_range(ts_cap.trim());
93✔
325
        auto level_cap = md[2];
93✔
326

327
        if (!level_cap) {
93✔
328
            lr.lr_end = prefix_len
20✔
329
                = lr.lr_start + this->lf_date_time.dts_fmt_len;
20✔
330
        }
331
        sa.emplace_back(lr, L_TIMESTAMP.value());
93✔
332

333
        values.lvv_values.emplace_back(TS_META, line, lr);
93✔
334
        values.lvv_values.back().lv_meta.lvm_format = (log_format*) this;
93✔
335

336
        if (level_cap) {
93✔
337
            if (string2level(level_cap->data(), level_cap->length(), true)
73✔
338
                != LEVEL_UNKNOWN)
73✔
339
            {
340
                values.lvv_values.emplace_back(
73✔
341
                    LEVEL_META, line, to_line_range(level_cap->trim()));
73✔
342
                values.lvv_values.back().lv_meta.lvm_format
73✔
343
                    = (log_format*) this;
73✔
344

345
                lr = to_line_range(level_cap->trim());
73✔
346
                if (lr.lr_end != (ssize_t) line.length()) {
73✔
347
                    sa.emplace_back(lr, L_LEVEL.value());
73✔
348
                }
349
            }
350
        }
351

352
        lr.lr_start = 0;
93✔
353
        lr.lr_end = prefix_len;
93✔
354
        sa.emplace_back(lr, L_PREFIX.value());
93✔
355

356
        lr.lr_start = prefix_len;
93✔
357
        lr.lr_end = line.length();
93✔
358
        sa.emplace_back(lr, SA_BODY.value());
93✔
359

360
        log_format::annotate(lf, line_number, sa, values);
93✔
361
    }
362

363
    std::shared_ptr<log_format> specialized(int fmt_lock) override
55✔
364
    {
365
        auto retval = std::make_shared<o1_generic_log_format>(*this);
55✔
366

367
        retval->lf_specialized = true;
55✔
368
        return retval;
110✔
369
    }
55✔
370

371
    bool hide_field(const intern_string_t field_name, bool val) override
35✔
372
    {
373
        if (field_name == TS_META.lvm_name) {
35✔
374
            TS_META.lvm_user_hidden = val;
12✔
375
            return true;
12✔
376
        }
377
        if (field_name == LEVEL_META.lvm_name) {
23✔
378
            LEVEL_META.lvm_user_hidden = val;
12✔
379
            return true;
12✔
380
        }
381
        if (field_name == OPID_META.lvm_name) {
11✔
382
            OPID_META.lvm_user_hidden = val;
11✔
383
            return true;
11✔
384
        }
UNCOV
385
        return false;
×
386
    }
387

388
    std::map<intern_string_t, logline_value_meta> get_field_states() override
97✔
389
    {
390
        return {
391
            {TS_META.lvm_name, TS_META},
392
            {LEVEL_META.lvm_name, LEVEL_META},
393
            {OPID_META.lvm_name, OPID_META},
394
        };
485✔
395
    }
97✔
396

397
private:
398
    static logline_value_meta TS_META;
399
    static logline_value_meta LEVEL_META;
400
    static logline_value_meta OPID_META;
401
};
402

403
logline_value_meta o1_generic_log_format::TS_META{
404
    intern_string::lookup("log_time"),
405
    value_kind_t::VALUE_TEXT,
406
    logline_value_meta::table_column{2},
407
};
408

409
logline_value_meta o1_generic_log_format::LEVEL_META{
410
    intern_string::lookup("log_level"),
411
    value_kind_t::VALUE_TEXT,
412
    logline_value_meta::table_column{3},
413
};
414

415
logline_value_meta o1_generic_log_format::OPID_META{
416
    intern_string::lookup("log_opid"),
417
    value_kind_t::VALUE_TEXT,
418
    logline_value_meta::internal_column{},
419
};
420

421
std::string
422
from_escaped_string(const char* str, size_t len)
25✔
423
{
424
    std::string retval;
25✔
425

426
    for (size_t lpc = 0; lpc < len; lpc++) {
50✔
427
        switch (str[lpc]) {
25✔
428
            case '\\':
25✔
429
                if ((lpc + 3) < len && str[lpc + 1] == 'x') {
25✔
430
                    int ch;
431

432
                    if (sscanf(&str[lpc + 2], "%2x", &ch) == 1) {
25✔
433
                        retval.append(1, (char) ch & 0xff);
25✔
434
                        lpc += 3;
25✔
435
                    }
436
                }
437
                break;
25✔
UNCOV
438
            default:
×
UNCOV
439
                retval.append(1, str[lpc]);
×
UNCOV
440
                break;
×
441
        }
442
    }
443

444
    return retval;
25✔
UNCOV
445
}
×
446

447
// -----------------------------------------------------------------
448
// Recognizes CSV files whose first line is a header with a
449
// timestamp-like first column (`timestamp`, `time`, `ts`, or a name
450
// starting with `date`), and whose subsequent rows begin with a
451
// parseable timestamp.  Tolerates a leading UTF-8 BOM, the
452
// Excel-style `sep=<ch>` delimiter hint, CRLF line endings, and
453
// CSV-style `""`-escaped double quotes inside quoted fields.
454
// The header line is emitted as an ignored logline so lnav stays
455
// locked to this format for the rest of the file.
456
//
457
// Each non-timestamp column is exposed as a `VALUE_FLOAT` field so
458
// queries such as `SELECT cpu_pct FROM metrics_log` work per-file.
459
// The cross-file long-format `all_metrics` SQL virtual table
460
// (source/metric/value across all loaded metric files) lives in
461
// `metrics_vtab.cc`.
462
// -----------------------------------------------------------------
463
class metrics_log_format : public log_format {
464
public:
465
    metrics_log_format()
917✔
466
    {
917✔
467
        this->lf_multiline = false;
917✔
468
        this->lf_is_metric = true;
917✔
469
        this->lf_time_ordered = false;
917✔
470
    }
917✔
471

472
    const intern_string_t get_name() const override
18,237✔
473
    {
474
        static const intern_string_t RETVAL
475
            = intern_string::lookup("metrics_log");
20,059✔
476

477
        return RETVAL;
18,237✔
478
    }
479

480
    scan_result_t parse_line(const string_fragment& line_sf,
484✔
481
                             std::vector<logline>& dst,
482
                             scan_batch_context& sbc)
483
    {
484
        separated_string ss{line_sf};
484✔
485
        ss.with_separator(this->mlf_separator);
484✔
486
        if (!this->mlf_headers.empty()) {
484✔
487
            ss.ss_expected_count = this->mlf_headers.size();
484✔
488
        }
489
        auto iter = ss.begin();
484✔
490
        if (iter == ss.end()) {
484✔
UNCOV
491
            return scan_error{"empty metric row"};
×
492
        }
493
        const auto ts_sf = *iter;
484✔
494

495
        auto& dts = this->lf_date_time;
484✔
496
        exttm tm;
484✔
497
        timeval tv;
498
        if (dts.scan(ts_sf.data(), ts_sf.length(), nullptr, &tm, tv) == nullptr)
484✔
499
        {
500
            return scan_error{fmt::format(
1✔
501
                FMT_STRING("metric row timestamp did not parse: {}"),
3✔
502
                ts_sf.to_string())};
3✔
503
        }
504
        dst.back().set_time(to_us(tv));
483✔
505
        // Propagate what the scanner learned (zone offset, subsecond
506
        // precision) so downstream consumers can reproduce the
507
        // timestamp in the right form.
508
        this->lf_timestamp_flags |= tm.et_flags;
483✔
509

510
        // Update per-column min/max stats.  Every non-timestamp
511
        // column is VALUE_FLOAT, so the field-def index maps 1:1
512
        // onto `sbc_value_stats`.  Dispatch on the iterator's
513
        // `kind()` so integers skip the float parser and so unit-
514
        // suffixed values (e.g. `1.5k`) fall back to `humanize`.
515
        sbc.sbc_value_stats.resize(this->mlf_field_defs.size());
483✔
516
        ++iter;
483✔
517
        auto field_index = 0;
483✔
518
        for (; iter != ss.end(); ++iter, ++field_index) {
1,695✔
519
            if (field_index >= this->mlf_field_defs.size()) {
1,214✔
520
                return scan_error{
2✔
521
                    fmt::format(FMT_STRING("metric row has too many fields, "
4✔
522
                                           "expecting only {} fields"),
523
                                this->mlf_field_defs.size())};
6✔
524
            }
525
            auto& stats = sbc.sbc_value_stats[field_index];
1,212✔
526
            // Track the widest raw cell so the LOG-view renderer can
527
            // column-align values across rows.
528
            const auto cell_len = static_cast<int64_t>((*iter).length());
1,212✔
529
            if (cell_len > stats.lvs_width) {
1,212✔
530
                stats.lvs_width = cell_len;
415✔
531
            }
532
            parse_cell(iter, parse_context::scan)
2,424✔
533
                .match(
1,212✔
UNCOV
534
                    [](empty_cell) {},
×
UNCOV
535
                    [&stats](int64_t i) {
×
536
                        stats.add_value(static_cast<double>(i));
781✔
537
                    },
781✔
538
                    [&stats](double d) { stats.add_value(d); },
343✔
539
                    [&stats](humanized_cell hc) { stats.add_value(hc.value); },
1,256✔
540
                    [](const text_cell& tc) {});
1,212✔
541
        }
542
        if (field_index < this->mlf_field_defs.size()) {
481✔
543
            return scan_error{fmt::format(
1✔
544
                FMT_STRING("metric row has too few fields: found {}, "
2✔
545
                           "expected {} fields"),
546
                field_index,
547
                this->mlf_field_defs.size())};
3✔
548
        }
549
        if (!this->lf_specialized) {
480✔
550
            auto number_cells = 0;
133✔
551
            for (const auto& stats : sbc.sbc_value_stats) {
474✔
552
                number_cells += stats.lvs_count;
341✔
553
            }
554
            if (number_cells == 0) {
133✔
555
                return scan_error{"metric row has no numeric fields"};
4✔
556
            }
557
        }
558

559
        return scan_match{500};
478✔
560
    }
561

562
    scan_result_t scan_int(std::vector<logline>& dst,
415✔
563
                           const line_info& li,
564
                           shared_buffer_ref& sbr,
565
                           scan_batch_context& sbc)
566
    {
567
        auto line_sf = sbr.to_string_fragment();
415✔
568

569
        // Reindex (triggered by e.g. `:set-file-timezone`) clears
570
        // `lf_index` but leaves `lf_specialized` set, so the first
571
        // post-clear scan arrives here with an empty `dst`.  Seed
572
        // from epoch rather than reading `dst.back()` on an empty
573
        // vector.
574
        auto& ll = dst.back();
415✔
575
        ll.set_level(LEVEL_STATS);
415✔
576
        auto retval = this->parse_line(line_sf, dst, sbc);
415✔
577
        return retval;
830✔
578
    }
579

580
    scan_result_t scan(logfile& lf,
14,681✔
581
                       std::vector<logline>& dst,
582
                       const line_info& li,
583
                       shared_buffer_ref& sbr,
584
                       scan_batch_context& sbc) override
585
    {
586
        if (li.li_partial) {
14,681✔
587
            return scan_incomplete{};
24✔
588
        }
589

590
        // Keep the scanner's default zone in sync with the file's
591
        // current options on every scan.  `:set-file-timezone`
592
        // mutates the options after the format has already specialized,
593
        // so a once-at-detection sync leaves stale state and every
594
        // subsequent timestamp parses against the wrong zone.
595
        {
596
            auto file_options = lf.get_file_options();
14,657✔
597
            this->lf_date_time.dts_default_zone = file_options
14,657✔
598
                ? file_options->second.fo_default_zone.pp_value
14,657✔
599
                : nullptr;
600
        }
14,657✔
601

602
        if (this->lf_specialized) {
14,657✔
603
            if (dst.size() == 1) {
350✔
604
                // Reindex (e.g. after `:set-file-timezone`) clears
605
                // `lf_index` and starts scanning from byte zero again.
606
                // The format is still locked in from the prior pass,
607
                // so just reproduce the header's ignored-logline so
608
                // the data rows that follow land in `scan_int` with
609
                // a valid `dst.back()`.
610
                auto& ll = dst.back();
1✔
611
                ll.set_level(LEVEL_UNKNOWN);
1✔
612
                ll.set_ignore(true);
1✔
613
                return scan_match{500};
1✔
614
            }
615
            // we've locked on, don't need to figure out the header
616
            return scan_int(dst, li, sbr, sbc);
349✔
617
        }
618

619
        if (dst.size() < 2) {
14,307✔
620
            return scan_no_match{"waiting for header and data row"};
1,434✔
621
        }
622

623
        if (dst.size() > 3) {
12,873✔
624
            return scan_no_match{
11,529✔
625
                "line is after CSV headers and first data row"};
11,529✔
626
        }
627

628
        // First part of the file — reset any per-file state left
629
        // over from a prior file on this shared base instance.
630
        this->mlf_headers.clear();
1,344✔
631
        this->mlf_field_defs.clear();
1,344✔
632
        this->mlf_separator = ',';
1,344✔
633
        auto has_sep_directive = false;
1,344✔
634
        for (auto ll_iter = dst.begin(); ll_iter != dst.end(); ++ll_iter) {
1,481✔
635
            auto read_res = lf.read_line(ll_iter);
1,415✔
636
            if (read_res.isErr()) {
1,415✔
UNCOV
637
                return scan_no_match{"cannot read header"};
×
638
            }
639

640
            auto hdr_sbr = read_res.unwrap();
1,415✔
641
            auto hdr_sf = hdr_sbr.to_string_fragment();
1,415✔
642
            // Excel-flavor CSVs sometimes start with `sep=<ch>` to
643
            // hint the delimiter.  Consume that as metadata and wait
644
            // for the real header on the next line.
645
            if (ll_iter == dst.begin() && hdr_sf.startswith("sep=")) {
1,415✔
646
                if (dst.size() == 1) {
2✔
UNCOV
647
                    return scan_no_match{"waiting for more data"};
×
648
                }
649

650
                const auto sep_sf = hdr_sf.substr(4);
2✔
651
                if (sep_sf.empty()) {
2✔
652
                    return scan_error{"sep= hint missing separator character"};
×
653
                }
654
                this->mlf_separator = sep_sf.data()[0];
2✔
655
                ll_iter->set_time(std::chrono::microseconds::zero());
2✔
656
                ll_iter->set_level(LEVEL_UNKNOWN);
2✔
657
                ll_iter->set_ignore(true);
2✔
658
                has_sep_directive = true;
2✔
659
                log_info("metrics_log found 'sep=' header: %x",
2✔
660
                         this->mlf_separator);
661
            } else if (this->mlf_headers.empty()) {
1,413✔
662
                // Header row: require a shape like
663
                // `timestamp,<name>,<name>...`.  This is a conservative
664
                // detector — files without a leading timestamp-named
665
                // column are left to other formats.
666
                separated_string ss{hdr_sf};
1,344✔
667
                if (!has_sep_directive) {
1,344✔
668
                    auto detect_res
669
                        = separated_string::detect_separator(hdr_sf);
1,342✔
670
                    if (detect_res) {
1,342✔
671
                        this->mlf_separator = detect_res.value();
333✔
672
                        log_info("metrics_log detected separator: %x",
333✔
673
                                 this->mlf_separator);
674
                    }
675
                }
676
                ss.with_separator(this->mlf_separator);
1,344✔
677
                std::vector<intern_string_t> fields;
1,344✔
678
                for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
3,704✔
679
                    // Header cells may be CSV-quoted (e.g. Grafana
680
                    // exports wrap PromQL expressions that contain
681
                    // commas or doubled quotes).  Collapse `""` back
682
                    // to `"` so the interned column name matches what
683
                    // the user wrote.
684
                    fields.emplace_back(intern_string::lookup(
2,360✔
685
                        separated_string::unescape_quoted(*iter)));
4,720✔
686
                    log_info("  metrics header: %s", fields.back().c_str());
2,360✔
687
                }
688
                if (fields.size() < 2) {
1,344✔
689
                    return scan_no_match{"too few columns for a metric CSV"};
864✔
690
                }
691
                const auto first = fields[0].to_string_fragment();
480✔
692
                const bool is_time_header = first.iequal("timestamp"_frag)
480✔
693
                    || first.iequal("time"_frag) || first.iequal("ts"_frag)
418✔
694
                    || (first.length() >= 4
1,203✔
695
                        && strncasecmp(first.data(), "date", 4) == 0);
305✔
696
                if (!is_time_header) {
480✔
697
                    return scan_error{fmt::format(
411✔
698
                        FMT_STRING(
1,233✔
699
                            "first column '{}' is not a timestamp header "
700
                            "(expected 'timestamp', 'time', 'ts', or a "
701
                            "'date'-prefixed name)"),
702
                        first.to_string())};
1,233✔
703
                }
704

705
                this->mlf_headers = std::move(fields);
69✔
706
                log_info("metrics_log found %zu header columns",
69✔
707
                         this->mlf_headers.size());
708
                this->build_field_defs();
69✔
709
                ll_iter->set_time(std::chrono::microseconds::zero());
69✔
710
                ll_iter->set_level(LEVEL_UNKNOWN);
69✔
711
                ll_iter->set_ignore(true);
69✔
712
            } else {
1,344✔
713
                auto scan_res = this->parse_line(hdr_sf, dst, sbc);
69✔
714
                if (!scan_res.is<scan_match>()) {
69✔
715
                    log_warning("first data row did not match");
3✔
716
                    return scan_res;
3✔
717
                }
718
                ll_iter->set_level(LEVEL_STATS);
66✔
719
            }
69✔
720
        }
2,693✔
721
        return this->scan_int(dst, li, sbr, sbc);
66✔
722
    }
723

724
    std::optional<size_t> stats_index_for_value(
1,143✔
725
        const intern_string_t& name) const override
726
    {
727
        for (size_t i = 0; i < this->mlf_field_defs.size(); ++i) {
2,191✔
728
            if (this->mlf_field_defs[i].lvm_name == name) {
2,152✔
729
                return i;
1,104✔
730
            }
731
        }
732
        return std::nullopt;
39✔
733
    }
734

735
    std::vector<logline_value_meta> get_value_metadata() const override
86✔
736
    {
737
        return this->mlf_field_defs;
86✔
738
    }
739

740
    size_t get_value_metadata_count() const override
431✔
741
    {
742
        return this->mlf_field_defs.size();
431✔
743
    }
744

745
    void annotate(logfile* lf,
976✔
746
                  uint64_t line_number,
747
                  string_attrs_t& sa,
748
                  logline_value_vector& values) const override
749
    {
750
        auto& sbr = values.lvv_sbr;
976✔
751
        const auto line_sf = sbr.to_string_fragment().trim("\r\n");
976✔
752

753
        separated_string ss{line_sf};
976✔
754
        ss.with_separator(this->mlf_separator);
976✔
755
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
7,678✔
756
            const auto field = *iter;
3,355✔
757
            const auto lr = line_range{field.sf_begin, field.sf_end};
3,355✔
758

759
            if (iter.index() == 0) {
3,355✔
760
                sa.emplace_back(lr, L_TIMESTAMP.value());
976✔
761
                continue;
976✔
762
            }
763
            // The header row is emitted as an ignored logline, so
764
            // `mlf_field_defs` (which excludes col 0) has one entry
765
            // per data column.  Extra trailing columns are dropped.
766
            const auto field_index = iter.index() - 1;
2,379✔
767
            if (field_index >= this->mlf_field_defs.size()) {
2,379✔
768
                break;
4✔
769
            }
770
            // Parse once rather than paying the re-parse cost each
771
            // time SQL reads the cell.  The variant preserves int vs
772
            // float so the renderer can format integers without a
773
            // trailing decimal point.  The static `mlf_hidden_columns`
774
            // registry is overlaid so hide state propagates across
775
            // specialized instances that share column names.
776
            auto meta = this->mlf_field_defs[field_index];
2,375✔
777
            if (mlf_hidden_columns.count(meta.lvm_name) != 0) {
2,375✔
778
                meta.lvm_user_hidden = true;
99✔
779
            }
780
            parse_cell(iter, parse_context::annotate)
2,375✔
781
                .match(
2,375✔
UNCOV
782
                    [&](empty_cell) { values.lvv_values.emplace_back(meta); },
×
783
                    [&](int64_t i) { values.lvv_values.emplace_back(meta, i); },
1,574✔
784
                    [&](double d) { values.lvv_values.emplace_back(meta, d); },
615✔
UNCOV
785
                    [&](humanized_cell hc) {
×
786
                        // Carry the detected unit on the per-value meta so
787
                        // downstream renderers can call humanize::format
788
                        // against the base-unit value.
789
                        auto cell_meta = meta;
124✔
790
                        cell_meta.lvm_unit_suffix = hc.unit_suffix;
124✔
791
                        values.lvv_values.emplace_back(cell_meta, hc.value);
124✔
792
                    },
124✔
793
                    [&](const text_cell& tc) {
2,375✔
794
                        values.lvv_values.emplace_back(meta, tc.value);
62✔
795
                        values.lvv_values.back().lv_meta.lvm_kind
62✔
796
                            = value_kind_t::VALUE_TEXT;
62✔
797
                    });
62✔
798
            values.lvv_values.back().lv_origin = lr;
2,375✔
799
        }
2,375✔
800

801
        log_format::annotate(lf, line_number, sa, values);
976✔
802
    }
976✔
803

804
    std::shared_ptr<log_format> specialized(int fmt_lock) override
63✔
805
    {
806
        auto retval = std::make_shared<metrics_log_format>(*this);
63✔
807

808
        retval->lf_specialized = true;
63✔
809
        return retval;
126✔
810
    }
63✔
811

812
private:
813
    // A parsed metric cell: either an int64, a double, or nothing
814
    // (empty or unparseable).  Keeping the original integer type
815
    // lets the renderer format int cells without a decimal point,
816
    // while callers that want a single numeric type can coerce via
817
    // the `match` below.
818
    struct empty_cell {};
819
    // Humanized cell: the raw text had a recognized unit suffix
820
    // ("1.5KB", "20ms", "2.5GHz").  The value is already normalized
821
    // to the base unit (bytes, seconds, Hz) and `unit_suffix` carries
822
    // the canonical suffix so downstream renderers can format it back
823
    // to human-friendly form.
824
    struct humanized_cell {
825
        double value;
826
        intern_string_t unit_suffix;
827
    };
828
    struct text_cell {
829
        std::string value;
830
    };
831
    using parsed_cell_t = mapbox::util::
832
        variant<empty_cell, int64_t, double, humanized_cell, text_cell>;
833

834
    enum class parse_context {
835
        scan,
836
        annotate,
837
    };
838

839
    static parsed_cell_t parse_cell(const separated_string::iterator& iter,
3,587✔
840
                                    parse_context pc)
841
    {
842
        const auto field = *iter;
3,587✔
843
        switch (iter.kind()) {
3,587✔
UNCOV
844
            case separated_string::cell_kind::empty: {
×
UNCOV
845
                return parsed_cell_t{empty_cell{}};
×
846
            }
847
            case separated_string::cell_kind::integer: {
2,355✔
848
                if (auto res = scn::scan_value<int64_t>(field.to_string_view()))
2,355✔
849
                {
850
                    return parsed_cell_t{res->value()};
2,355✔
851
                }
UNCOV
852
                return parsed_cell_t{empty_cell{}};
×
853
            }
854
            case separated_string::cell_kind::floating: {
958✔
855
                if (auto res = scn::scan_value<double>(field.to_string_view()))
958✔
856
                {
857
                    return parsed_cell_t{res->value()};
958✔
858
                }
UNCOV
859
                return parsed_cell_t{empty_cell{}};
×
860
            }
861
            case separated_string::cell_kind::number_with_suffix: {
168✔
862
                // Classifier already confirmed the shape is `<num><unit>`.
863
                if (auto res = humanize::try_from<double>(field)) {
168✔
864
                    return parsed_cell_t{humanized_cell{
336✔
865
                        res->value,
168✔
866
                        intern_string::lookup(res->unit_suffix),
168✔
867
                    }};
168✔
868
                }
UNCOV
869
                return parsed_cell_t{empty_cell{}};
×
870
            }
871
            case separated_string::cell_kind::other: {
106✔
872
                // Plain text; humanize wouldn't have parsed it.
873
                switch (pc) {
106✔
874
                    case parse_context::scan:
44✔
875
                        // During scanning, treat unparseable text as
876
                        // empty so it doesn't mess with stats or
877
                        // trigger a type change on the column.
878
                        return parsed_cell_t{empty_cell{}};
44✔
879
                    case parse_context::annotate:
62✔
880
                        // During annotation, preserve the text so the
881
                        // renderer can show it and the user can query
882
                        // against it.
883
                        return parsed_cell_t{text_cell{
124✔
884
                            separated_string::unescape_quoted(field)}};
62✔
885
                }
886
            }
887
        }
UNCOV
888
        return parsed_cell_t{empty_cell{}};
×
889
    }
890

891
    void build_field_defs()
69✔
892
    {
893
        this->mlf_field_defs.clear();
69✔
894
        // Columns 1..N (timestamp is column 0) become VALUE_FLOAT
895
        // fields.  Column names are kept verbatim from the header;
896
        // the CREATE TABLE generator applies SQL quoting for names
897
        // that need it.  Pass `this` as the owning format so the
898
        // field_overlay_source treats these as real table fields
899
        // (show/hide, chart, etc.) rather than skipping them.
900
        for (size_t h = 1; h < this->mlf_headers.size(); ++h) {
243✔
901
            this->mlf_field_defs.emplace_back(
174✔
902
                this->mlf_headers[h],
174✔
UNCOV
903
                value_kind_t::VALUE_FLOAT,
×
UNCOV
904
                logline_value_meta::table_column{h - 1},
×
905
                this);
174✔
906
            if (mlf_hidden_columns.count(this->mlf_headers[h]) != 0) {
174✔
UNCOV
907
                this->mlf_field_defs.back().lvm_user_hidden = true;
×
908
            }
909
        }
910
    }
69✔
911

912
public:
913
    // Hide state lives in a static set instead of on the meta so it
914
    // survives file re-detection (which rebuilds `mlf_field_defs` from
915
    // scratch) and propagates across every specialized instance that
916
    // shares the column name.  Only the currently-hidden columns are
917
    // tracked — showing a column erases its entry rather than storing
918
    // `false`, so the set stays bounded across hide/show cycles.
919
    bool hide_field(const intern_string_t field_name, bool val) override
18✔
920
    {
921
        if (val) {
18✔
922
            mlf_hidden_columns.insert(field_name);
7✔
923
        } else {
924
            mlf_hidden_columns.erase(field_name);
11✔
925
        }
926
        for (auto& meta : this->mlf_field_defs) {
66✔
927
            if (meta.lvm_name == field_name) {
48✔
928
                if (val) {
17✔
929
                    meta.lvm_user_hidden = true;
6✔
930
                } else {
931
                    meta.lvm_user_hidden.reset();
11✔
932
                }
933
            }
934
        }
935
        return true;
18✔
936
    }
937

938
    std::map<intern_string_t, logline_value_meta> get_field_states() override
191✔
939
    {
940
        std::map<intern_string_t, logline_value_meta> retval;
191✔
941
        for (const auto& meta : this->mlf_field_defs) {
416✔
942
            retval.emplace(meta.lvm_name, meta);
225✔
943
        }
944
        // Include columns that were hidden before this instance saw
945
        // its header, so session save still captures them.
946
        for (const auto& name : mlf_hidden_columns) {
194✔
947
            if (retval.count(name) != 0) {
3✔
948
                continue;
3✔
949
            }
UNCOV
950
            logline_value_meta meta{name, value_kind_t::VALUE_FLOAT};
×
UNCOV
951
            meta.lvm_user_hidden = true;
×
UNCOV
952
            retval.emplace(name, std::move(meta));
×
953
        }
954
        return retval;
191✔
UNCOV
955
    }
×
956

957
    std::vector<intern_string_t> mlf_headers;
958
    std::vector<logline_value_meta> mlf_field_defs;
959
    // Column separator; overridden by an Excel-style `sep=<ch>` hint
960
    // on the first line of the file.
961
    char mlf_separator{','};
962

963
    // User-hidden metric column names.  Shared across every
964
    // `metrics_log_format` instance so hides set via
965
    // `:hide-fields metrics_log.<col>` affect every open metric file
966
    // that has the column, and survive file re-detection (which
967
    // rebuilds `mlf_field_defs`).  Only currently-hidden columns are
968
    // stored; `hide_field(name, false)` erases so the set stays
969
    // bounded across hide/show cycles.
970
    static std::set<intern_string_t> mlf_hidden_columns;
971
};
972

973
std::set<intern_string_t> metrics_log_format::mlf_hidden_columns;
974

975
class bro_log_format : public log_format {
976
public:
977
    static const intern_string_t TS;
978
    static const intern_string_t DURATION;
979
    struct field_def {
980
        logline_value_meta fd_meta;
981
        logline_value_meta* fd_root_meta;
982
        std::string fd_collator;
983
        std::optional<size_t> fd_numeric_index;
984

985
        explicit field_def(const intern_string_t name,
709✔
986
                           size_t col,
987
                           log_format* format)
988
            : fd_meta(name,
1,418✔
989
                      value_kind_t::VALUE_TEXT,
990
                      logline_value_meta::table_column{col},
709✔
991
                      format),
992
              fd_root_meta(&FIELD_META.find(name)->second)
709✔
993
        {
994
        }
709✔
995

996
        field_def& with_kind(value_kind_t kind,
521✔
997
                             bool identifier = false,
998
                             bool foreign_key = false,
999
                             const std::string& collator = "")
1000
        {
1001
            this->fd_meta.lvm_kind = kind;
521✔
1002
            this->fd_meta.lvm_identifier = identifier;
521✔
1003
            this->fd_meta.lvm_foreign_key = foreign_key;
521✔
1004
            this->fd_collator = collator;
521✔
1005
            return *this;
521✔
1006
        }
1007

1008
        field_def& with_numeric_index(size_t index)
131✔
1009
        {
1010
            this->fd_numeric_index = index;
131✔
1011
            return *this;
131✔
1012
        }
1013
    };
1014

1015
    static std::unordered_map<const intern_string_t, logline_value_meta>
1016
        FIELD_META;
1017

1018
    static const intern_string_t get_opid_desc()
917✔
1019
    {
1020
        static const intern_string_t RETVAL = intern_string::lookup("std");
2,751✔
1021

1022
        return RETVAL;
917✔
1023
    }
1024

1025
    bro_log_format()
917✔
1026
    {
917✔
1027
        this->lf_multiline = false;
917✔
1028
        this->lf_structured = true;
917✔
1029
        this->lf_is_self_describing = true;
917✔
1030
        this->lf_time_ordered = false;
917✔
1031
        this->lf_timestamp_point_of_reference
1032
            = timestamp_point_of_reference_t::start;
917✔
1033

1034
        auto desc_v = std::make_shared<std::vector<opid_descriptor>>();
917✔
1035
        desc_v->emplace({});
917✔
1036
        auto emplace_res = this->lf_opid_description_def->emplace(
1,834✔
1037
            get_opid_desc(), opid_descriptors{{}, desc_v, 0});
1,834✔
1038
        this->lf_opid_description_def_vec->emplace_back(
917✔
1039
            &emplace_res.first->second);
917✔
1040
    }
917✔
1041

1042
    const intern_string_t get_name() const override
124,380✔
1043
    {
1044
        static const intern_string_t name(intern_string::lookup("bro"));
126,202✔
1045

1046
        return this->blf_format_name.empty() ? name : this->blf_format_name;
124,380✔
1047
    }
1048

1049
    void clear() override
14,706✔
1050
    {
1051
        this->log_format::clear();
14,706✔
1052
        this->blf_format_name.clear();
14,706✔
1053
        this->blf_field_defs.clear();
14,706✔
1054
    }
14,706✔
1055

1056
    std::vector<logline_value_meta> get_value_metadata() const override
1✔
1057
    {
1058
        std::vector<logline_value_meta> retval;
1✔
1059

1060
        for (const auto& fd : this->blf_field_defs) {
30✔
1061
            retval.emplace_back(fd.fd_meta);
29✔
1062
        }
1063
        return retval;
1✔
UNCOV
1064
    }
×
1065

1066
    scan_result_t scan_int(std::vector<logline>& dst,
4,963✔
1067
                           const line_info& li,
1068
                           shared_buffer_ref& sbr,
1069
                           scan_batch_context& sbc)
1070
    {
1071
        static const intern_string_t STATUS_CODE
1072
            = intern_string::lookup("bro_status_code");
5,011✔
1073
        static const intern_string_t UID = intern_string::lookup("bro_uid");
5,011✔
1074
        static const intern_string_t ID_ORIG_H
1075
            = intern_string::lookup("bro_id_orig_h");
5,011✔
1076

1077
        separated_string ss(sbr.to_string_fragment());
4,963✔
1078
        timeval tv;
1079
        exttm tm;
4,963✔
1080
        size_t found_ts = 0;
4,963✔
1081
        log_level_t level = LEVEL_INFO;
4,963✔
1082
        uint64_t opid_bloom = 0;
4,963✔
1083
        auto opid_cap = string_fragment::invalid();
4,963✔
1084
        auto host_cap = string_fragment::invalid();
4,963✔
1085
        auto duration = std::chrono::microseconds{0};
4,963✔
1086

1087
        sbc.sbc_value_stats.resize(this->blf_field_defs.size());
4,963✔
1088
        ss.with_separator(this->blf_separator.get()[0]);
4,963✔
1089

1090
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
146,619✔
1091
            if (iter.index() == 0 && *iter == "#close"_frag) {
141,683✔
1092
                dst.back().set_ignore(true);
27✔
1093
                return scan_match{2000};
27✔
1094
            }
1095

1096
            if (iter.index() >= this->blf_field_defs.size()) {
141,656✔
UNCOV
1097
                break;
×
1098
            }
1099

1100
            const auto& fd = this->blf_field_defs[iter.index()];
141,656✔
1101

1102
            if (TS == fd.fd_meta.lvm_name) {
141,656✔
1103
                static const char* const TIME_FMT[] = {"%s.%f"};
1104
                const auto sf = *iter;
4,936✔
1105

1106
                if (this->lf_date_time.scan(
4,936✔
1107
                        sf.data(), sf.length(), TIME_FMT, &tm, tv))
4,936✔
1108
                {
1109
                    this->lf_timestamp_flags = tm.et_flags;
4,936✔
1110
                    found_ts += 1;
4,936✔
1111
                }
1112
            } else if (STATUS_CODE == fd.fd_meta.lvm_name) {
136,720✔
1113
                const auto sf = *iter;
4,750✔
1114

1115
                if (!sf.empty() && sf[0] >= '4') {
4,750✔
1116
                    level = LEVEL_ERROR;
24✔
1117
                }
1118
            } else if (UID == fd.fd_meta.lvm_name) {
131,970✔
1119
                opid_cap = *iter;
4,936✔
1120

1121
                opid_bloom = opid_cap.bloom_bits();
4,936✔
1122
            } else if (ID_ORIG_H == fd.fd_meta.lvm_name) {
127,034✔
1123
                host_cap = *iter;
4,936✔
1124
            } else if (DURATION == fd.fd_meta.lvm_name) {
122,098✔
1125
                const auto sf = *iter;
186✔
1126
                auto scan_res = scn::scan<double>("{}", sf.to_string_view());
186✔
1127
                if (scan_res) {
186✔
UNCOV
1128
                    duration = std::chrono::microseconds{
×
1129
                        static_cast<long long>(scan_res->value() * 1000000)};
1130
                }
1131
            }
1132

1133
            if (fd.fd_numeric_index) {
141,656✔
1134
                switch (fd.fd_meta.lvm_kind) {
25,238✔
1135
                    case value_kind_t::VALUE_INTEGER:
25,238✔
1136
                    case value_kind_t::VALUE_FLOAT: {
1137
                        const auto sv = (*iter).to_string_view();
25,238✔
1138
                        auto scan_float_res = scn::scan_value<double>(sv);
25,238✔
1139
                        if (scan_float_res) {
25,238✔
1140
                            sbc.sbc_value_stats[fd.fd_numeric_index.value()]
20,488✔
1141
                                .add_value(scan_float_res->value());
20,488✔
1142
                        }
1143
                        break;
25,238✔
1144
                    }
UNCOV
1145
                    default:
×
UNCOV
1146
                        break;
×
1147
                }
1148
            }
1149
        }
1150

1151
        if (found_ts == 1) {
4,936✔
1152
            auto log_us = to_us(tv);
4,936✔
1153
            if (!this->lf_specialized) {
4,936✔
1154
                for (auto& ll : dst) {
250✔
1155
                    ll.set_time(log_us);
225✔
1156
                    ll.set_ignore(true);
225✔
1157
                }
1158
            }
1159

1160
            if (opid_cap.is_valid()) {
4,936✔
1161
                auto opid_iter = sbc.sbc_opids.insert_op(
4,936✔
1162
                    sbc.sbc_allocator,
1163
                    opid_cap,
1164
                    log_us,
1165
                    this->lf_timestamp_point_of_reference,
1166
                    duration);
1167
                opid_iter->second.otr_level_stats.update_msg_count(level);
4,936✔
1168

1169
                auto& otr = opid_iter->second;
4,936✔
1170
                if (!otr.otr_description.lod_index && host_cap.is_valid()
7,137✔
1171
                    && otr.otr_description.lod_elements.empty())
7,137✔
1172
                {
1173
                    otr.otr_description.lod_index = 0;
2,201✔
1174
                    otr.otr_description.lod_elements.insert(
4,402✔
1175
                        0, host_cap.to_string());
2,201✔
1176
                }
1177
            }
1178

1179
            auto& ll = dst.back();
4,936✔
1180
            ll.set_time(log_us);
4,936✔
1181
            ll.set_level(level);
4,936✔
1182
            ll.set_ignore(false);
4,936✔
1183
            ll.merge_bloom_bits(opid_bloom);
4,936✔
1184
            return scan_match{2000};
4,936✔
1185
        }
UNCOV
1186
        return scan_no_match{"no header found"};
×
1187
    }
1188

1189
    scan_result_t scan(logfile& lf,
14,682✔
1190
                       std::vector<logline>& dst,
1191
                       const line_info& li,
1192
                       shared_buffer_ref& sbr,
1193
                       scan_batch_context& sbc) override
1194
    {
1195
        static const auto SEP_RE
1196
            = lnav::pcre2pp::code::from_const(R"(^#separator\s+(.+))");
14,682✔
1197

1198
        if (dst.size() == 1) {
14,682✔
1199
            auto file_options = lf.get_file_options();
1,439✔
1200

1201
            if (file_options) {
1,439✔
1202
                this->lf_date_time.dts_default_zone
1203
                    = file_options->second.fo_default_zone.pp_value;
60✔
1204
            } else {
1205
                this->lf_date_time.dts_default_zone = nullptr;
1,379✔
1206
            }
1207
        }
1,439✔
1208

1209
        if (!this->blf_format_name.empty()) {
14,682✔
1210
            return this->scan_int(dst, li, sbr, sbc);
4,938✔
1211
        }
1212

1213
        if (dst.size() <= 2 || dst.size() > 20 || sbr.empty()
17,268✔
1214
            || sbr.get_data()[0] == '#')
17,268✔
1215
        {
1216
            return scan_no_match{"no header found"};
6,809✔
1217
        }
1218

1219
        auto line_iter = dst.begin();
2,935✔
1220
        auto read_result = lf.read_line(line_iter);
2,935✔
1221

1222
        if (read_result.isErr()) {
2,935✔
UNCOV
1223
            return scan_no_match{"unable to read first line"};
×
1224
        }
1225

1226
        auto line = read_result.unwrap();
2,935✔
1227
        auto md = SEP_RE.create_match_data();
2,935✔
1228

1229
        auto match_res = SEP_RE.capture_from(line.to_string_fragment())
2,935✔
1230
                             .into(md)
2,935✔
1231
                             .matches(PCRE2_NO_UTF_CHECK)
5,870✔
1232
                             .ignore_error();
2,935✔
1233
        if (!match_res) {
2,935✔
1234
            return scan_no_match{"cannot read separator header"};
2,910✔
1235
        }
1236

1237
        this->clear();
25✔
1238

1239
        auto sep = from_escaped_string(md[1]->data(), md[1]->length());
25✔
1240
        this->blf_separator = intern_string::lookup(sep);
25✔
1241

1242
        for (++line_iter; line_iter != dst.end(); ++line_iter) {
225✔
1243
            auto next_read_result = lf.read_line(line_iter);
200✔
1244

1245
            if (next_read_result.isErr()) {
200✔
UNCOV
1246
                return scan_no_match{"unable to read header line"};
×
1247
            }
1248

1249
            line = next_read_result.unwrap();
200✔
1250
            separated_string ss(line.to_string_fragment());
200✔
1251

1252
            ss.with_separator(this->blf_separator.get()[0]);
200✔
1253
            auto iter = ss.begin();
200✔
1254

1255
            string_fragment directive = *iter;
200✔
1256

1257
            if (directive.empty() || directive[0] != '#') {
200✔
1258
                continue;
25✔
1259
            }
1260

1261
            ++iter;
175✔
1262
            if (iter == ss.end()) {
175✔
UNCOV
1263
                continue;
×
1264
            }
1265

1266
            if (directive == "#set_separator") {
175✔
1267
                this->blf_set_separator = intern_string::lookup(*iter);
25✔
1268
            } else if (directive == "#empty_field") {
150✔
1269
                this->blf_empty_field = intern_string::lookup(*iter);
25✔
1270
            } else if (directive == "#unset_field") {
125✔
1271
                this->blf_unset_field = intern_string::lookup(*iter);
25✔
1272
            } else if (directive == "#path") {
100✔
1273
                auto full_name = fmt::format(FMT_STRING("bro_{}_log"), *iter);
75✔
1274
                this->blf_format_name = intern_string::lookup(full_name);
25✔
1275
            } else if (directive == "#fields" && this->blf_field_defs.empty()) {
100✔
1276
                do {
1277
                    auto field_name
1278
                        = intern_string::lookup("bro_" + sql_safe_ident(*iter));
709✔
1279
                    auto common_iter = FIELD_META.find(field_name);
709✔
1280
                    if (common_iter == FIELD_META.end()) {
709✔
1281
                        FIELD_META.emplace(field_name,
703✔
1282
                                           logline_value_meta{
1,406✔
1283
                                               field_name,
1284
                                               value_kind_t::VALUE_TEXT,
1285
                                           });
1286
                    }
1287
                    this->blf_field_defs.emplace_back(
1,418✔
1288
                        field_name, this->blf_field_defs.size(), this);
709✔
1289
                    ++iter;
709✔
1290
                } while (iter != ss.end());
709✔
1291
            } else if (directive == "#types") {
50✔
1292
                static const char* KNOWN_IDS[] = {
1293
                    "bro_conn_uids",
1294
                    "bro_fuid",
1295
                    "bro_host",
1296
                    "bro_info_code",
1297
                    "bro_method",
1298
                    "bro_mime_type",
1299
                    "bro_orig_fuids",
1300
                    "bro_parent_fuid",
1301
                    "bro_proto",
1302
                    "bro_referrer",
1303
                    "bro_resp_fuids",
1304
                    "bro_service",
1305
                    "bro_uid",
1306
                    "bro_uri",
1307
                    "bro_user_agent",
1308
                    "bro_username",
1309
                };
1310
                static const char* KNOWN_FOREIGN[] = {
1311
                    "bro_status_code",
1312
                };
1313

1314
                int numeric_count = 0;
25✔
1315

1316
                do {
1317
                    string_fragment field_type = *iter;
709✔
1318
                    auto& fd = this->blf_field_defs[iter.index() - 1];
709✔
1319

1320
                    if (field_type == "time") {
709✔
1321
                        fd.with_kind(value_kind_t::VALUE_TIMESTAMP);
50✔
1322
                    } else if (field_type == "string") {
684✔
1323
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
522✔
1324
                                                        std::end(KNOWN_IDS),
1325
                                                        fd.fd_meta.lvm_name);
261✔
1326
                        fd.with_kind(value_kind_t::VALUE_TEXT, ident);
522✔
1327
                    } else if (field_type == "count") {
423✔
1328
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
258✔
1329
                                                        std::end(KNOWN_IDS),
1330
                                                        fd.fd_meta.lvm_name);
129✔
1331
                        bool foreign
1332
                            = std::binary_search(std::begin(KNOWN_FOREIGN),
258✔
1333
                                                 std::end(KNOWN_FOREIGN),
1334
                                                 fd.fd_meta.lvm_name);
129✔
1335
                        fd.with_kind(
258✔
1336
                              value_kind_t::VALUE_INTEGER, ident, foreign)
1337
                            .with_numeric_index(numeric_count);
129✔
1338
                        numeric_count += 1;
129✔
1339
                    } else if (field_type == "bool") {
294✔
1340
                        fd.with_kind(value_kind_t::VALUE_BOOLEAN);
8✔
1341
                    } else if (field_type == "addr") {
290✔
1342
                        fd.with_kind(
100✔
1343
                            value_kind_t::VALUE_TEXT, true, false, "ipaddress");
1344
                    } else if (field_type == "port") {
240✔
1345
                        fd.with_kind(value_kind_t::VALUE_INTEGER, true);
100✔
1346
                    } else if (field_type == "interval") {
190✔
1347
                        fd.with_kind(value_kind_t::VALUE_FLOAT)
4✔
1348
                            .with_numeric_index(numeric_count);
2✔
1349
                        numeric_count += 1;
2✔
1350
                    }
1351

1352
                    ++iter;
709✔
1353
                } while (iter != ss.end());
709✔
1354
            }
1355
        }
200✔
1356

1357
        if (!this->blf_format_name.empty() && !this->blf_separator.empty()
50✔
1358
            && !this->blf_field_defs.empty())
50✔
1359
        {
1360
            return this->scan_int(dst, li, sbr, sbc);
25✔
1361
        }
1362

UNCOV
1363
        this->blf_format_name.clear();
×
1364

UNCOV
1365
        return scan_no_match{"no header found"};
×
1366
    }
2,935✔
1367

1368
    void annotate(logfile* lf,
30,358✔
1369
                  uint64_t line_number,
1370
                  string_attrs_t& sa,
1371
                  logline_value_vector& values) const override
1372
    {
1373
        static const intern_string_t UID = intern_string::lookup("bro_uid");
30,398✔
1374

1375
        auto& sbr = values.lvv_sbr;
30,358✔
1376
        separated_string ss(sbr.to_string_fragment());
30,358✔
1377

1378
        ss.with_separator(this->blf_separator.get()[0]);
30,358✔
1379

1380
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
681,188✔
1381
            if (iter.index() >= this->blf_field_defs.size()) {
650,830✔
UNCOV
1382
                return;
×
1383
            }
1384

1385
            const field_def& fd = this->blf_field_defs[iter.index()];
650,830✔
1386
            string_fragment sf = *iter;
650,830✔
1387

1388
            if (sf == this->blf_empty_field) {
650,830✔
1389
                sf.clear();
30,358✔
1390
            } else if (sf == this->blf_unset_field) {
620,472✔
1391
                sf.invalidate();
74,315✔
1392
            }
1393

1394
            auto lr = line_range(sf.sf_begin, sf.sf_end);
650,830✔
1395

1396
            if (fd.fd_meta.lvm_name == TS) {
650,830✔
1397
                sa.emplace_back(lr, L_TIMESTAMP.value());
30,358✔
1398
            } else if (fd.fd_meta.lvm_name == UID) {
620,472✔
1399
                sa.emplace_back(lr, L_OPID.value());
30,358✔
1400
                values.lvv_opid_value = sf.to_string();
30,358✔
1401
                values.lvv_opid_provenance
1402
                    = logline_value_vector::opid_provenance::file;
30,358✔
1403
            }
1404

1405
            if (lr.is_valid()) {
650,830✔
1406
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
576,515✔
1407
            } else {
1408
                values.lvv_values.emplace_back(fd.fd_meta);
74,315✔
1409
            }
1410
            values.lvv_values.back().lv_meta.lvm_user_hidden
650,830✔
1411
                = fd.fd_root_meta->lvm_user_hidden;
650,830✔
1412
        }
1413

1414
        log_format::annotate(lf, line_number, sa, values);
30,358✔
1415
    }
1416

1417
    std::optional<size_t> stats_index_for_value(
36✔
1418
        const intern_string_t& name) const override
1419
    {
1420
        for (const auto& blf_field_def : this->blf_field_defs) {
540✔
1421
            if (blf_field_def.fd_meta.lvm_name == name) {
540✔
1422
                if (!blf_field_def.fd_numeric_index) {
36✔
1423
                    break;
×
1424
                }
1425
                return blf_field_def.fd_numeric_index.value();
36✔
1426
            }
1427
        }
1428

1429
        return std::nullopt;
×
1430
    }
1431

1432
    bool hide_field(intern_string_t field_name, bool val) override
2✔
1433
    {
1434
        if (field_name == LOG_TIME_STR) {
2✔
UNCOV
1435
            field_name = TS;
×
1436
        }
1437

1438
        auto fd_iter = FIELD_META.find(field_name);
2✔
1439
        if (fd_iter == FIELD_META.end()) {
2✔
UNCOV
1440
            return false;
×
1441
        }
1442

1443
        fd_iter->second.lvm_user_hidden = val;
2✔
1444

1445
        return true;
2✔
1446
    }
1447

1448
    std::map<intern_string_t, logline_value_meta> get_field_states() override
97✔
1449
    {
1450
        std::map<intern_string_t, logline_value_meta> retval;
97✔
1451

1452
        for (const auto& fd : FIELD_META) {
213✔
1453
            retval.emplace(fd.first, fd.second);
116✔
1454
        }
1455

1456
        return retval;
97✔
1457
    }
×
1458

1459
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
25✔
1460
    {
1461
        auto retval = std::make_shared<bro_log_format>(*this);
25✔
1462

1463
        retval->lf_specialized = true;
25✔
1464
        for (auto& fd : retval->blf_field_defs) {
734✔
1465
            fd.fd_meta.lvm_format = retval.get();
709✔
1466
        }
1467
        return retval;
50✔
1468
    }
25✔
1469

1470
    class bro_log_table : public log_format_vtab_impl {
1471
    public:
1472
        explicit bro_log_table(std::shared_ptr<const log_format> format)
23✔
1473
            : log_format_vtab_impl(format),
23✔
1474
              blt_format(dynamic_cast<const bro_log_format*>(format.get()))
23✔
1475
        {
1476
        }
23✔
1477

1478
        void get_columns(std::vector<vtab_column>& cols) const override
32✔
1479
        {
1480
            for (const auto& fd : this->blt_format->blf_field_defs) {
944✔
1481
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
912✔
1482
                    fd.fd_meta.lvm_kind);
912✔
1483

1484
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
912✔
1485
                                  type_pair.first,
1486
                                  fd.fd_collator,
912✔
1487
                                  false,
1,824✔
1488
                                  "",
1489
                                  type_pair.second);
1490
            }
1491
        }
32✔
1492

1493
        void get_foreign_keys(
11✔
1494
            std::unordered_set<std::string>& keys_inout) const override
1495
        {
1496
            this->log_vtab_impl::get_foreign_keys(keys_inout);
11✔
1497

1498
            for (const auto& fd : this->blt_format->blf_field_defs) {
322✔
1499
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
311✔
1500
                    keys_inout.emplace(fd.fd_meta.lvm_name.to_string());
136✔
1501
                }
1502
            }
1503
        }
11✔
1504

1505
        const bro_log_format* blt_format;
1506
    };
1507

1508
    static std::map<intern_string_t, std::shared_ptr<bro_log_table>>&
1509
    get_tables()
23✔
1510
    {
1511
        static std::map<intern_string_t, std::shared_ptr<bro_log_table>> retval;
23✔
1512

1513
        return retval;
23✔
1514
    }
1515

1516
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
819✔
1517
    {
1518
        if (this->blf_format_name.empty()) {
819✔
1519
            return nullptr;
796✔
1520
        }
1521

1522
        std::shared_ptr<bro_log_table> retval = nullptr;
23✔
1523

1524
        auto& tables = get_tables();
23✔
1525
        const auto iter = tables.find(this->blf_format_name);
23✔
1526
        if (iter == tables.end()) {
23✔
1527
            retval = std::make_shared<bro_log_table>(this->shared_from_this());
23✔
1528
            tables[this->blf_format_name] = retval;
23✔
1529
        }
1530

1531
        return retval;
23✔
1532
    }
23✔
1533

1534
    void get_subline(const log_format_file_state& lffs,
35,126✔
1535
                     const logline& ll,
1536
                     shared_buffer_ref& sbr,
1537
                     subline_options opts) override
1538
    {
1539
    }
35,126✔
1540

1541
    intern_string_t blf_format_name;
1542
    intern_string_t blf_separator;
1543
    intern_string_t blf_set_separator;
1544
    intern_string_t blf_empty_field;
1545
    intern_string_t blf_unset_field;
1546
    std::vector<field_def> blf_field_defs;
1547
};
1548

1549
std::unordered_map<const intern_string_t, logline_value_meta>
1550
    bro_log_format::FIELD_META;
1551

1552
const intern_string_t bro_log_format::TS = intern_string::lookup("bro_ts");
1553
const intern_string_t bro_log_format::DURATION
1554
    = intern_string::lookup("bro_duration");
1555

1556
struct ws_separated_string {
1557
    const char* ss_str;
1558
    size_t ss_len;
1559

1560
    explicit ws_separated_string(const char* str = nullptr, size_t len = -1)
30,720✔
1561
        : ss_str(str), ss_len(len)
30,720✔
1562
    {
1563
    }
30,720✔
1564

1565
    struct iterator {
1566
        enum class state_t {
1567
            NORMAL,
1568
            QUOTED,
1569
        };
1570

1571
        const ws_separated_string& i_parent;
1572
        const char* i_pos;
1573
        const char* i_next_pos;
1574
        size_t i_index{0};
1575
        state_t i_state{state_t::NORMAL};
1576

1577
        iterator(const ws_separated_string& ss, const char* pos)
79,647✔
1578
            : i_parent(ss), i_pos(pos), i_next_pos(pos)
79,647✔
1579
        {
1580
            this->update();
79,647✔
1581
        }
79,647✔
1582

1583
        void update()
125,793✔
1584
        {
1585
            const auto& ss = this->i_parent;
125,793✔
1586
            bool done = false;
125,793✔
1587

1588
            while (!done && this->i_next_pos < (ss.ss_str + ss.ss_len)) {
988,295✔
1589
                switch (this->i_state) {
862,502✔
1590
                    case state_t::NORMAL:
854,721✔
1591
                        if (*this->i_next_pos == '"') {
854,721✔
1592
                            this->i_state = state_t::QUOTED;
322✔
1593
                        } else if (isspace(*this->i_next_pos)) {
854,399✔
1594
                            done = true;
66,563✔
1595
                        }
1596
                        break;
854,721✔
1597
                    case state_t::QUOTED:
7,781✔
1598
                        if (*this->i_next_pos == '"') {
7,781✔
1599
                            this->i_state = state_t::NORMAL;
321✔
1600
                        }
1601
                        break;
7,781✔
1602
                }
1603
                if (!done) {
862,502✔
1604
                    this->i_next_pos += 1;
795,939✔
1605
                }
1606
            }
1607
        }
125,793✔
1608

1609
        iterator& operator++()
46,146✔
1610
        {
1611
            const auto& ss = this->i_parent;
46,146✔
1612

1613
            this->i_pos = this->i_next_pos;
46,146✔
1614
            while (this->i_pos < (ss.ss_str + ss.ss_len)
46,146✔
1615
                   && isspace(*this->i_pos))
89,675✔
1616
            {
1617
                this->i_pos += 1;
43,529✔
1618
                this->i_next_pos += 1;
43,529✔
1619
            }
1620
            this->update();
46,146✔
1621
            this->i_index += 1;
46,146✔
1622

1623
            return *this;
46,146✔
1624
        }
1625

1626
        string_fragment operator*()
71,432✔
1627
        {
1628
            const auto& ss = this->i_parent;
71,432✔
1629
            int end = this->i_next_pos - ss.ss_str;
71,432✔
1630

1631
            return string_fragment(ss.ss_str, this->i_pos - ss.ss_str, end);
71,432✔
1632
        }
1633

1634
        bool operator==(const iterator& other) const
48,927✔
1635
        {
1636
            return (&this->i_parent == &other.i_parent)
48,927✔
1637
                && (this->i_pos == other.i_pos);
48,927✔
1638
        }
1639

1640
        bool operator!=(const iterator& other) const
46,065✔
1641
        {
1642
            return !(*this == other);
46,065✔
1643
        }
1644

1645
        size_t index() const { return this->i_index; }
86,567✔
1646
    };
1647

1648
    iterator begin() { return {*this, this->ss_str}; }
30,720✔
1649

1650
    iterator end() { return {*this, this->ss_str + this->ss_len}; }
48,927✔
1651
};
1652

1653
class w3c_log_format : public log_format {
1654
public:
1655
    static const intern_string_t F_DATE;
1656
    static const intern_string_t F_TIME;
1657

1658
    struct field_def {
1659
        const intern_string_t fd_name;
1660
        logline_value_meta fd_meta;
1661
        logline_value_meta* fd_root_meta{nullptr};
1662
        std::string fd_collator;
1663
        std::optional<size_t> fd_numeric_index;
1664

1665
        explicit field_def(const intern_string_t name)
22✔
1666
            : fd_name(name), fd_meta(intern_string::lookup(sql_safe_ident(
44✔
1667
                                         name.to_string_fragment())),
44✔
1668
                                     value_kind_t::VALUE_TEXT)
22✔
1669
        {
1670
        }
22✔
1671

1672
        field_def(const intern_string_t name, logline_value_meta meta)
71✔
1673
            : fd_name(name), fd_meta(meta)
71✔
1674
        {
1675
        }
71✔
1676

1677
        field_def(size_t col,
12,240✔
1678
                  const char* name,
1679
                  value_kind_t kind,
1680
                  bool ident = false,
1681
                  bool foreign_key = false,
1682
                  std::string coll = "")
1683
            : fd_name(intern_string::lookup(name)),
24,480✔
1684
              fd_meta(
24,480✔
1685
                  intern_string::lookup(sql_safe_ident(string_fragment(name))),
24,480✔
1686
                  kind,
1687
                  logline_value_meta::table_column{col}),
12,240✔
1688
              fd_collator(std::move(coll))
12,240✔
1689
        {
1690
            this->fd_meta.lvm_identifier = ident;
12,240✔
1691
            this->fd_meta.lvm_foreign_key = foreign_key;
12,240✔
1692
        }
12,240✔
1693

1694
        field_def& with_kind(value_kind_t kind,
1695
                             bool identifier = false,
1696
                             const std::string& collator = "")
1697
        {
1698
            this->fd_meta.lvm_kind = kind;
1699
            this->fd_meta.lvm_identifier = identifier;
1700
            this->fd_collator = collator;
1701
            return *this;
1702
        }
1703

1704
        field_def& with_numeric_index(int index)
51✔
1705
        {
1706
            this->fd_numeric_index = index;
51✔
1707
            return *this;
51✔
1708
        }
1709
    };
1710

1711
    static std::unordered_map<const intern_string_t, logline_value_meta>
1712
        FIELD_META;
1713

1714
    struct field_to_struct_t {
1715
        field_to_struct_t(const char* prefix, const char* struct_name)
3,060✔
1716
            : fs_prefix(prefix),
3,060✔
1717
              fs_struct_name(intern_string::lookup(struct_name))
6,120✔
1718
        {
1719
        }
3,060✔
1720

1721
        const char* fs_prefix;
1722
        intern_string_t fs_struct_name;
1723
    };
1724

1725
    static const std::array<field_def, 16>& get_known_fields()
782✔
1726
    {
1727
        static size_t KNOWN_FIELD_INDEX = 0;
1728
        static const std::array<field_def, 16> RETVAL = {
1729
            field_def{
1730
                KNOWN_FIELD_INDEX++,
1731
                "cs-method",
1732
                value_kind_t::VALUE_TEXT,
1733
                true,
1734
            },
1735
            {
1736
                KNOWN_FIELD_INDEX++,
1737
                "c-ip",
1738
                value_kind_t::VALUE_TEXT,
1739
                true,
1740
                false,
1741
                "ipaddress",
1742
            },
1743
            {
1744
                KNOWN_FIELD_INDEX++,
1745
                "cs-bytes",
1746
                value_kind_t::VALUE_INTEGER,
1747
                false,
1748
            },
1749
            {
1750
                KNOWN_FIELD_INDEX++,
1751
                "cs-host",
1752
                value_kind_t::VALUE_TEXT,
1753
                true,
1754
            },
1755
            {
1756
                KNOWN_FIELD_INDEX++,
1757
                "cs-uri-stem",
1758
                value_kind_t::VALUE_TEXT,
1759
                true,
1760
                false,
1761
                "naturalnocase",
1762
            },
1763
            {
1764
                KNOWN_FIELD_INDEX++,
1765
                "cs-uri-query",
1766
                value_kind_t::VALUE_TEXT,
1767
                false,
1768
            },
1769
            {
1770
                KNOWN_FIELD_INDEX++,
1771
                "cs-username",
1772
                value_kind_t::VALUE_TEXT,
1773
                false,
1774
            },
1775
            {
1776
                KNOWN_FIELD_INDEX++,
1777
                "cs-version",
1778
                value_kind_t::VALUE_TEXT,
1779
                true,
1780
            },
1781
            {
1782
                KNOWN_FIELD_INDEX++,
1783
                "s-ip",
1784
                value_kind_t::VALUE_TEXT,
1785
                true,
1786
                false,
1787
                "ipaddress",
1788
            },
1789
            {
1790
                KNOWN_FIELD_INDEX++,
1791
                "s-port",
1792
                value_kind_t::VALUE_INTEGER,
1793
                true,
1794
            },
1795
            {
1796
                KNOWN_FIELD_INDEX++,
1797
                "s-computername",
1798
                value_kind_t::VALUE_TEXT,
1799
                true,
1800
            },
1801
            {
1802
                KNOWN_FIELD_INDEX++,
1803
                "s-sitename",
1804
                value_kind_t::VALUE_TEXT,
1805
                true,
1806
            },
1807
            {
1808
                KNOWN_FIELD_INDEX++,
1809
                "sc-bytes",
1810
                value_kind_t::VALUE_INTEGER,
1811
                false,
1812
            },
1813
            {
1814
                KNOWN_FIELD_INDEX++,
1815
                "sc-status",
1816
                value_kind_t::VALUE_INTEGER,
1817
                false,
1818
                true,
1819
            },
1820
            {
1821
                KNOWN_FIELD_INDEX++,
1822
                "sc-substatus",
1823
                value_kind_t::VALUE_INTEGER,
1824
                false,
1825
            },
1826
            {
1827
                KNOWN_FIELD_INDEX++,
1828
                "time-taken",
1829
                value_kind_t::VALUE_FLOAT,
1830
                false,
1831
            },
1832
        };
2,312✔
1833

1834
        return RETVAL;
782✔
1835
    }
1836

1837
    static const std::array<field_to_struct_t, 4>& get_known_struct_fields()
779✔
1838
    {
1839
        static const std::array<field_to_struct_t, 4> RETVAL = {
1840
            field_to_struct_t{"cs(", "cs_headers"},
1841
            {"sc(", "sc_headers"},
1842
            {"rs(", "rs_headers"},
1843
            {"sr(", "sr_headers"},
1844
        };
779✔
1845

1846
        return RETVAL;
779✔
1847
    }
1848

1849
    w3c_log_format()
917✔
1850
    {
917✔
1851
        this->lf_multiline = false;
917✔
1852
        this->lf_is_self_describing = true;
917✔
1853
        this->lf_time_ordered = false;
917✔
1854
        this->lf_structured = true;
917✔
1855
    }
917✔
1856

1857
    const intern_string_t get_name() const override
19,625✔
1858
    {
1859
        static const intern_string_t name(intern_string::lookup("w3c_log"));
21,447✔
1860

1861
        return this->wlf_format_name.empty() ? name : this->wlf_format_name;
19,625✔
1862
    }
1863

1864
    void clear() override
18,575✔
1865
    {
1866
        this->log_format::clear();
18,575✔
1867
        this->wlf_time_scanner.clear();
18,575✔
1868
        this->wlf_format_name.clear();
18,575✔
1869
        this->wlf_field_defs.clear();
18,575✔
1870
    }
18,575✔
1871

UNCOV
1872
    std::vector<logline_value_meta> get_value_metadata() const override
×
1873
    {
UNCOV
1874
        std::vector<logline_value_meta> retval;
×
1875

UNCOV
1876
        for (const auto& fd : this->wlf_field_defs) {
×
UNCOV
1877
            retval.emplace_back(fd.fd_meta);
×
1878
        }
UNCOV
1879
        return retval;
×
UNCOV
1880
    }
×
1881

1882
    scan_result_t scan_int(std::vector<logline>& dst,
1,315✔
1883
                           const line_info& li,
1884
                           shared_buffer_ref& sbr,
1885
                           scan_batch_context& sbc)
1886
    {
1887
        static const intern_string_t F_DATE_LOCAL
1888
            = intern_string::lookup("date-local");
1,349✔
1889
        static const intern_string_t F_DATE_UTC
1890
            = intern_string::lookup("date-UTC");
1,349✔
1891
        static const intern_string_t F_TIME_LOCAL
1892
            = intern_string::lookup("time-local");
1,349✔
1893
        static const intern_string_t F_TIME_UTC
1894
            = intern_string::lookup("time-UTC");
1,349✔
1895
        static const intern_string_t F_STATUS_CODE
1896
            = intern_string::lookup("sc-status");
1,349✔
1897

1898
        ws_separated_string ss(sbr.get_data(), sbr.length());
1,315✔
1899
        timeval date_tv{0, 0}, time_tv{0, 0};
1,315✔
1900
        exttm date_tm, time_tm;
1,315✔
1901
        size_t found_date = 0;
1,315✔
1902
        size_t found_time = 0;
1,315✔
1903
        log_level_t level = LEVEL_INFO;
1,315✔
1904

1905
        sbc.sbc_value_stats.resize(this->wlf_field_defs.size());
1,315✔
1906
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
19,693✔
1907
            if (iter.index() >= this->wlf_field_defs.size()) {
18,588✔
1908
                level = LEVEL_INVALID;
1✔
1909
                break;
1✔
1910
            }
1911

1912
            const auto& fd = this->wlf_field_defs[iter.index()];
18,587✔
1913
            string_fragment sf = *iter;
18,587✔
1914

1915
            if (sf.startswith("#")) {
18,587✔
1916
                if (sf == "#Date:") {
209✔
1917
                    auto sbr_sf_opt
1918
                        = sbr.to_string_fragment().consume_n(sf.length());
53✔
1919

1920
                    if (sbr_sf_opt) {
53✔
1921
                        auto sbr_sf = sbr_sf_opt.value().trim();
53✔
1922
                        date_time_scanner dts;
53✔
1923
                        exttm tm;
53✔
1924
                        timeval tv;
1925

1926
                        if (dts.scan(sbr_sf.data(),
53✔
1927
                                     sbr_sf.length(),
53✔
1928
                                     nullptr,
1929
                                     &tm,
1930
                                     tv))
1931
                        {
1932
                            this->lf_date_time.set_base_time(tv.tv_sec,
52✔
1933
                                                             tm.et_tm);
1934
                            this->wlf_time_scanner.set_base_time(tv.tv_sec,
52✔
1935
                                                                 tm.et_tm);
1936
                        }
1937
                    }
1938
                }
1939
                auto& ll = dst.back();
209✔
1940
                ll.set_level(LEVEL_UNKNOWN);
209✔
1941
                ll.set_ignore(true);
209✔
1942
                return scan_match{2000};
209✔
1943
            }
1944

1945
            sf = sf.trim("\" \t");
18,378✔
1946
            if (F_DATE == fd.fd_name || F_DATE_LOCAL == fd.fd_name
35,694✔
1947
                || F_DATE_UTC == fd.fd_name)
35,694✔
1948
            {
1949
                if (this->lf_date_time.scan(
1,070✔
1950
                        sf.data(), sf.length(), nullptr, &date_tm, date_tv))
1,070✔
1951
                {
1952
                    this->lf_timestamp_flags |= date_tm.et_flags;
1,069✔
1953
                    found_date += 1;
1,069✔
1954
                }
1955
            } else if (F_TIME == fd.fd_name || F_TIME_LOCAL == fd.fd_name
33,523✔
1956
                       || F_TIME_UTC == fd.fd_name)
33,523✔
1957
            {
1958
                if (this->wlf_time_scanner.scan(
1,101✔
1959
                        sf.data(), sf.length(), nullptr, &time_tm, time_tv))
1,101✔
1960
                {
1961
                    this->lf_timestamp_flags |= time_tm.et_flags;
1,101✔
1962
                    found_time += 1;
1,101✔
1963
                }
1964
            } else if (F_STATUS_CODE == fd.fd_name) {
16,207✔
1965
                if (!sf.empty() && sf[0] >= '4') {
1,098✔
1966
                    level = LEVEL_ERROR;
1,018✔
1967
                }
1968
            }
1969

1970
            if (fd.fd_numeric_index) {
18,378✔
1971
                switch (fd.fd_meta.lvm_kind) {
6,402✔
1972
                    case value_kind_t::VALUE_INTEGER:
6,402✔
1973
                    case value_kind_t::VALUE_FLOAT: {
1974
                        auto scan_float_res
1975
                            = scn::scan_value<double>(sf.to_string_view());
6,402✔
1976

1977
                        if (scan_float_res) {
6,402✔
1978
                            sbc.sbc_value_stats[fd.fd_numeric_index.value()]
6,398✔
1979
                                .add_value(scan_float_res->value());
6,398✔
1980
                        }
1981
                        break;
6,402✔
1982
                    }
UNCOV
1983
                    default:
×
UNCOV
1984
                        break;
×
1985
                }
1986
            }
1987
        }
1988

1989
        if (found_time == 1 && found_date <= 1) {
1,106✔
1990
            auto tm = time_tm;
1,101✔
1991

1992
            if (found_date) {
1,101✔
1993
                tm.et_tm.tm_year = date_tm.et_tm.tm_year;
1,069✔
1994
                tm.et_tm.tm_mday = date_tm.et_tm.tm_mday;
1,069✔
1995
                tm.et_tm.tm_mon = date_tm.et_tm.tm_mon;
1,069✔
1996
                tm.et_tm.tm_wday = date_tm.et_tm.tm_wday;
1,069✔
1997
                tm.et_tm.tm_yday = date_tm.et_tm.tm_yday;
1,069✔
1998
            }
1999

2000
            auto tv = tm.to_timeval();
1,101✔
2001
            if (!this->lf_specialized) {
1,101✔
2002
                for (auto& ll : dst) {
84✔
2003
                    ll.set_time(tv);
70✔
2004
                    ll.set_ignore(true);
70✔
2005
                }
2006
            }
2007
            auto& ll = dst.back();
1,101✔
2008
            ll.set_time(tv);
1,101✔
2009
            ll.set_level(level);
1,101✔
2010
            ll.set_ignore(false);
1,101✔
2011
            return scan_match{2000};
1,101✔
2012
        }
2013

2014
        return scan_no_match{"no header found"};
5✔
2015
    }
2016

2017
    scan_result_t scan(logfile& lf,
14,706✔
2018
                       std::vector<logline>& dst,
2019
                       const line_info& li,
2020
                       shared_buffer_ref& sbr,
2021
                       scan_batch_context& sbc) override
2022
    {
2023
        static const auto* W3C_LOG_NAME = intern_string::lookup("w3c_log");
16,236✔
2024
        static const auto* X_FIELDS_NAME = intern_string::lookup("x_fields");
16,236✔
2025
        static const auto& KNOWN_FIELDS = get_known_fields();
14,706✔
2026
        static const auto& KNOWN_STRUCT_FIELDS = get_known_struct_fields();
14,706✔
2027
        static auto X_FIELDS_IDX = 0;
2028

2029
        if (li.li_partial) {
14,706✔
2030
            return scan_incomplete{};
24✔
2031
        }
2032

2033
        if (dst.size() == 1) {
14,682✔
2034
            auto file_options = lf.get_file_options();
1,435✔
2035

2036
            if (file_options) {
1,435✔
2037
                this->lf_date_time.dts_default_zone
2038
                    = file_options->second.fo_default_zone.pp_value;
60✔
2039
            } else {
2040
                this->lf_date_time.dts_default_zone = nullptr;
1,375✔
2041
            }
2042
        }
1,435✔
2043

2044
        if (!this->wlf_format_name.empty()) {
14,682✔
2045
            return this->scan_int(dst, li, sbr, sbc);
1,296✔
2046
        }
2047

2048
        if (dst.size() < 2 || dst.size() > 20 || sbr.empty()
25,337✔
2049
            || sbr.get_data()[0] == '#')
25,337✔
2050
        {
2051
            return scan_no_match{"no header found"};
9,492✔
2052
        }
2053

2054
        this->clear();
3,894✔
2055

2056
        for (auto line_iter = dst.begin(); line_iter != dst.end(); ++line_iter)
31,833✔
2057
        {
2058
            auto next_read_result = lf.read_line(line_iter);
27,939✔
2059

2060
            if (next_read_result.isErr()) {
27,939✔
UNCOV
2061
                return scan_no_match{"unable to read first line"};
×
2062
            }
2063

2064
            auto line = next_read_result.unwrap();
27,939✔
2065
            ws_separated_string ss(line.get_data(), line.length());
27,939✔
2066
            auto iter = ss.begin();
27,939✔
2067
            const auto directive = *iter;
27,939✔
2068

2069
            if (directive.empty() || directive[0] != '#') {
27,939✔
2070
                continue;
25,077✔
2071
            }
2072

2073
            ++iter;
2,862✔
2074
            if (iter == ss.end()) {
2,862✔
2075
                continue;
39✔
2076
            }
2077

2078
            if (directive == "#Date:") {
2,823✔
2079
                date_time_scanner dts;
12✔
2080
                struct exttm tm;
12✔
2081
                struct timeval tv;
2082

2083
                if (dts.scan(line.get_data_at(directive.length() + 1),
12✔
2084
                             line.length() - directive.length() - 1,
12✔
2085
                             nullptr,
2086
                             &tm,
2087
                             tv))
2088
                {
2089
                    this->lf_date_time.set_base_time(tv.tv_sec, tm.et_tm);
11✔
2090
                    this->wlf_time_scanner.set_base_time(tv.tv_sec, tm.et_tm);
11✔
2091
                }
2092
            } else if (directive == "#Fields:" && this->wlf_field_defs.empty())
2,811✔
2093
            {
2094
                int numeric_count = 0;
19✔
2095

2096
                do {
2097
                    auto sf = (*iter).trim(")");
210✔
2098

2099
                    auto field_iter = std::find_if(
630✔
2100
                        begin(KNOWN_FIELDS),
2101
                        end(KNOWN_FIELDS),
2102
                        [&sf](auto elem) { return sf == elem.fd_name; });
2,400✔
2103
                    if (field_iter != end(KNOWN_FIELDS)) {
420✔
2104
                        this->wlf_field_defs.emplace_back(*field_iter);
117✔
2105
                        auto& fd = this->wlf_field_defs.back();
117✔
2106
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
117✔
2107
                        if (common_iter == FIELD_META.end()) {
117✔
2108
                            auto emp_res = FIELD_META.emplace(
116✔
2109
                                fd.fd_meta.lvm_name, fd.fd_meta);
116✔
2110
                            common_iter = emp_res.first;
116✔
2111
                        }
2112
                        fd.fd_root_meta = &common_iter->second;
117✔
2113
                    } else if (sf.is_one_of("date", "time")) {
93✔
2114
                        this->wlf_field_defs.emplace_back(
44✔
2115
                            intern_string::lookup(sf));
22✔
2116
                        auto& fd = this->wlf_field_defs.back();
22✔
2117
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
22✔
2118
                        if (common_iter == FIELD_META.end()) {
22✔
2119
                            auto emp_res = FIELD_META.emplace(
21✔
2120
                                fd.fd_meta.lvm_name, fd.fd_meta);
21✔
2121
                            common_iter = emp_res.first;
21✔
2122
                        }
2123
                        fd.fd_root_meta = &common_iter->second;
22✔
2124
                    } else {
2125
                        const auto fs_iter = std::find_if(
213✔
2126
                            begin(KNOWN_STRUCT_FIELDS),
2127
                            end(KNOWN_STRUCT_FIELDS),
2128
                            [&sf](auto elem) {
221✔
2129
                                return sf.startswith(elem.fs_prefix);
221✔
2130
                            });
2131
                        if (fs_iter != end(KNOWN_STRUCT_FIELDS)) {
142✔
2132
                            const intern_string_t field_name
2133
                                = intern_string::lookup(sf.substr(3));
21✔
2134
                            this->wlf_field_defs.emplace_back(
21✔
2135
                                field_name,
2136
                                logline_value_meta(
42✔
2137
                                    field_name,
2138
                                    value_kind_t::VALUE_TEXT,
UNCOV
2139
                                    logline_value_meta::table_column{
×
2140
                                        KNOWN_FIELDS.size() + 1
21✔
2141
                                        + std::distance(
63✔
2142
                                            begin(KNOWN_STRUCT_FIELDS),
2143
                                            fs_iter)},
2144
                                    this)
42✔
2145
                                    .with_struct_name(fs_iter->fs_struct_name));
2146
                        } else {
2147
                            const intern_string_t field_name
2148
                                = intern_string::lookup(sf);
50✔
2149
                            this->wlf_field_defs.emplace_back(
50✔
2150
                                field_name,
2151
                                logline_value_meta(
100✔
2152
                                    field_name,
2153
                                    value_kind_t::VALUE_TEXT,
2154
                                    logline_value_meta::table_column{
×
2155
                                        KNOWN_FIELDS.size() + X_FIELDS_IDX},
100✔
2156
                                    this)
100✔
2157
                                    .with_struct_name(X_FIELDS_NAME));
2158
                        }
2159
                    }
2160
                    auto& fd = this->wlf_field_defs.back();
210✔
2161
                    fd.fd_meta.lvm_format = std::make_optional(this);
210✔
2162
                    switch (fd.fd_meta.lvm_kind) {
210✔
2163
                        case value_kind_t::VALUE_FLOAT:
51✔
2164
                        case value_kind_t::VALUE_INTEGER:
2165
                            fd.with_numeric_index(numeric_count);
51✔
2166
                            numeric_count += 1;
51✔
2167
                            break;
51✔
2168
                        default:
159✔
2169
                            break;
159✔
2170
                    }
2171

2172
                    ++iter;
210✔
2173
                } while (iter != ss.end());
210✔
2174

2175
                this->wlf_format_name = W3C_LOG_NAME;
19✔
2176
            }
2177
        }
53,055✔
2178

2179
        if (!this->wlf_format_name.empty() && !this->wlf_field_defs.empty()) {
3,894✔
2180
            return this->scan_int(dst, li, sbr, sbc);
19✔
2181
        }
2182

2183
        this->wlf_format_name.clear();
3,875✔
2184

2185
        return scan_no_match{"no header found"};
3,875✔
2186
    }
2187

2188
    void annotate(logfile* lf,
1,466✔
2189
                  uint64_t line_number,
2190
                  string_attrs_t& sa,
2191
                  logline_value_vector& values) const override
2192
    {
2193
        auto& sbr = values.lvv_sbr;
1,466✔
2194
        ws_separated_string ss(sbr.get_data(), sbr.length());
1,466✔
2195
        std::optional<line_range> date_lr;
1,466✔
2196
        std::optional<line_range> time_lr;
1,466✔
2197

2198
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
26,162✔
2199
            auto sf = *iter;
24,696✔
2200

2201
            if (iter.index() >= this->wlf_field_defs.size()) {
24,696✔
UNCOV
2202
                sa.emplace_back(line_range{sf.sf_begin, -1},
×
UNCOV
2203
                                SA_INVALID.value("extra fields detected"s));
×
UNCOV
2204
                return;
×
2205
            }
2206

2207
            const auto& fd = this->wlf_field_defs[iter.index()];
24,696✔
2208

2209
            if (sf == "-") {
24,696✔
2210
                sf.invalidate();
4,300✔
2211
            }
2212

2213
            auto lr = line_range(sf.sf_begin, sf.sf_end);
24,696✔
2214

2215
            if (lr.is_valid()) {
24,696✔
2216
                if (fd.fd_meta.lvm_name == F_DATE) {
20,396✔
2217
                    date_lr = lr;
1,444✔
2218
                } else if (fd.fd_meta.lvm_name == F_TIME) {
18,952✔
2219
                    time_lr = lr;
1,458✔
2220
                }
2221
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
20,396✔
2222
                if (sf.startswith("\"")) {
20,396✔
2223
                    auto& meta = values.lvv_values.back().lv_meta;
28✔
2224

2225
                    if (meta.lvm_kind == value_kind_t::VALUE_TEXT) {
28✔
2226
                        meta.lvm_kind = value_kind_t::VALUE_W3C_QUOTED;
26✔
2227
                    } else {
2228
                        meta.lvm_kind = value_kind_t::VALUE_NULL;
2✔
2229
                    }
2230
                }
2231
            } else {
2232
                values.lvv_values.emplace_back(fd.fd_meta);
4,300✔
2233
            }
2234
            if (fd.fd_root_meta != nullptr) {
24,696✔
2235
                values.lvv_values.back().lv_meta.lvm_user_hidden
20,318✔
2236
                    = fd.fd_root_meta->lvm_user_hidden;
20,318✔
2237
            }
2238
        }
2239
        if (time_lr) {
1,466✔
2240
            auto ts_lr = time_lr.value();
1,458✔
2241
            if (date_lr) {
1,458✔
2242
                if (date_lr->lr_end + 1 == time_lr->lr_start) {
1,443✔
2243
                    ts_lr.lr_start = date_lr->lr_start;
1,442✔
2244
                    ts_lr.lr_end = time_lr->lr_end;
1,442✔
2245
                }
2246
            }
2247

2248
            sa.emplace_back(ts_lr, L_TIMESTAMP.value());
1,458✔
2249
        }
2250
        log_format::annotate(lf, line_number, sa, values);
1,466✔
2251
    }
2252

UNCOV
2253
    std::optional<size_t> stats_index_for_value(
×
2254
        const intern_string_t& name) const override
2255
    {
UNCOV
2256
        for (const auto& wlf_field_def : this->wlf_field_defs) {
×
UNCOV
2257
            if (wlf_field_def.fd_meta.lvm_name == name) {
×
UNCOV
2258
                if (!wlf_field_def.fd_numeric_index) {
×
UNCOV
2259
                    break;
×
2260
                }
UNCOV
2261
                return wlf_field_def.fd_numeric_index.value();
×
2262
            }
2263
        }
2264

UNCOV
2265
        return std::nullopt;
×
2266
    }
2267

UNCOV
2268
    bool hide_field(const intern_string_t field_name, bool val) override
×
2269
    {
UNCOV
2270
        if (field_name == LOG_TIME_STR) {
×
UNCOV
2271
            auto date_iter = FIELD_META.find(F_DATE);
×
UNCOV
2272
            auto time_iter = FIELD_META.find(F_TIME);
×
UNCOV
2273
            if (date_iter == FIELD_META.end() || time_iter == FIELD_META.end())
×
2274
            {
UNCOV
2275
                return false;
×
2276
            }
UNCOV
2277
            date_iter->second.lvm_user_hidden = val;
×
UNCOV
2278
            time_iter->second.lvm_user_hidden = val;
×
UNCOV
2279
            return true;
×
2280
        }
2281

UNCOV
2282
        auto fd_iter = FIELD_META.find(field_name);
×
UNCOV
2283
        if (fd_iter == FIELD_META.end()) {
×
UNCOV
2284
            return false;
×
2285
        }
2286

UNCOV
2287
        fd_iter->second.lvm_user_hidden = val;
×
2288

UNCOV
2289
        return true;
×
2290
    }
2291

2292
    std::map<intern_string_t, logline_value_meta> get_field_states() override
97✔
2293
    {
2294
        std::map<intern_string_t, logline_value_meta> retval;
97✔
2295

2296
        for (const auto& fd : FIELD_META) {
153✔
2297
            retval.emplace(fd.first, fd.second);
56✔
2298
        }
2299

2300
        return retval;
97✔
UNCOV
2301
    }
×
2302

2303
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
14✔
2304
    {
2305
        auto retval = std::make_shared<w3c_log_format>(*this);
14✔
2306

2307
        retval->lf_specialized = true;
14✔
2308
        return retval;
28✔
2309
    }
14✔
2310

2311
    class w3c_log_table : public log_format_vtab_impl {
2312
    public:
2313
        explicit w3c_log_table(std::shared_ptr<const log_format> format)
11✔
2314
            : log_format_vtab_impl(format)
11✔
2315
        {
2316
        }
11✔
2317

2318
        void get_columns(std::vector<vtab_column>& cols) const override
14✔
2319
        {
2320
            for (const auto& fd : get_known_fields()) {
238✔
2321
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
224✔
2322
                    fd.fd_meta.lvm_kind);
224✔
2323

2324
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
224✔
2325
                                  type_pair.first,
2326
                                  fd.fd_collator,
224✔
2327
                                  false,
448✔
2328
                                  "",
2329
                                  type_pair.second);
2330
            }
2331
            cols.emplace_back("x_fields");
14✔
2332
            cols.back().with_comment(
28✔
2333
                "A JSON-object that contains fields that are not first-class "
2334
                "columns");
2335
            for (const auto& fs : get_known_struct_fields()) {
70✔
2336
                cols.emplace_back(fs.fs_struct_name.to_string());
56✔
2337
            }
2338
        }
14✔
2339

2340
        void get_foreign_keys(
3✔
2341
            std::unordered_set<std::string>& keys_inout) const override
2342
        {
2343
            this->log_vtab_impl::get_foreign_keys(keys_inout);
3✔
2344

2345
            for (const auto& fd : get_known_fields()) {
51✔
2346
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
48✔
2347
                    keys_inout.emplace(fd.fd_meta.lvm_name.to_string());
30✔
2348
                }
2349
            }
2350
        }
3✔
2351
    };
2352

2353
    static std::map<intern_string_t, std::shared_ptr<w3c_log_table>>&
2354
    get_tables()
11✔
2355
    {
2356
        static std::map<intern_string_t, std::shared_ptr<w3c_log_table>> retval;
11✔
2357

2358
        return retval;
11✔
2359
    }
2360

2361
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
807✔
2362
    {
2363
        if (this->wlf_format_name.empty()) {
807✔
2364
            return nullptr;
796✔
2365
        }
2366

2367
        std::shared_ptr<w3c_log_table> retval = nullptr;
11✔
2368

2369
        auto& tables = get_tables();
11✔
2370
        const auto iter = tables.find(this->wlf_format_name);
11✔
2371
        if (iter == tables.end()) {
11✔
2372
            retval = std::make_shared<w3c_log_table>(this->shared_from_this());
11✔
2373
            tables[this->wlf_format_name] = retval;
11✔
2374
        }
2375

2376
        return retval;
11✔
2377
    }
11✔
2378

2379
    void get_subline(const log_format_file_state& lffs,
1,623✔
2380
                     const logline& ll,
2381
                     shared_buffer_ref& sbr,
2382
                     subline_options opts) override
2383
    {
2384
    }
1,623✔
2385

2386
    date_time_scanner wlf_time_scanner;
2387
    intern_string_t wlf_format_name;
2388
    std::vector<field_def> wlf_field_defs;
2389
};
2390

2391
std::unordered_map<const intern_string_t, logline_value_meta>
2392
    w3c_log_format::FIELD_META;
2393

2394
const intern_string_t w3c_log_format::F_DATE = intern_string::lookup("date");
2395
const intern_string_t w3c_log_format::F_TIME = intern_string::lookup("time");
2396

2397
struct logfmt_pair_handler {
2398
    explicit logfmt_pair_handler(date_time_scanner& dts) : lph_dt_scanner(dts)
14,681✔
2399
    {
2400
    }
14,681✔
2401

2402
    log_format::scan_result_t process_value(const string_fragment& value_frag)
4,267✔
2403
    {
2404
        if (this->lph_key_frag.is_one_of(
4,267✔
2405
                "timestamp"_frag, "time"_frag, "ts"_frag, "t"_frag))
2406
        {
2407
            if (!this->lph_dt_scanner.scan(value_frag.data(),
49✔
2408
                                           value_frag.length(),
49✔
2409
                                           nullptr,
2410
                                           &this->lph_time_tm,
2411
                                           this->lph_tv))
49✔
2412
            {
2413
                return log_format::scan_no_match{
12✔
2414
                    "timestamp value did not parse correctly"};
12✔
2415
            }
2416
            char buf[1024];
2417
            this->lph_dt_scanner.ftime(
37✔
2418
                buf, sizeof(buf), nullptr, this->lph_time_tm);
37✔
2419
            this->lph_found_time += 1;
37✔
2420
        } else if (this->lph_key_frag.is_one_of("level"_frag, "lvl"_frag)) {
4,218✔
2421
            this->lph_level
2422
                = string2level(value_frag.data(), value_frag.length());
46✔
2423
        }
2424
        return log_format::scan_match{};
4,255✔
2425
    }
2426

2427
    date_time_scanner& lph_dt_scanner;
2428
    size_t lph_found_time{0};
2429
    exttm lph_time_tm;
2430
    timeval lph_tv{0, 0};
2431
    log_level_t lph_level{log_level_t::LEVEL_INFO};
2432
    string_fragment lph_key_frag{""};
2433
};
2434

2435
class logfmt_format : public log_format {
2436
public:
2437
    const intern_string_t get_name() const override
18,093✔
2438
    {
2439
        const static intern_string_t NAME = intern_string::lookup("logfmt_log");
19,915✔
2440

2441
        return NAME;
18,093✔
2442
    }
2443

2444
    class logfmt_log_table : public log_format_vtab_impl {
2445
    public:
2446
        logfmt_log_table(std::shared_ptr<const log_format> format)
796✔
2447
            : log_format_vtab_impl(format)
796✔
2448
        {
2449
        }
796✔
2450

2451
        void get_columns(std::vector<vtab_column>& cols) const override
797✔
2452
        {
2453
            static const auto FIELDS = std::string("fields");
2,389✔
2454

2455
            cols.emplace_back(FIELDS);
797✔
2456
        }
797✔
2457
    };
2458

2459
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
796✔
2460
    {
2461
        static auto retval
2462
            = std::make_shared<logfmt_log_table>(this->shared_from_this());
796✔
2463

2464
        return retval;
796✔
2465
    }
2466

2467
    scan_result_t scan(logfile& lf,
14,681✔
2468
                       std::vector<logline>& dst,
2469
                       const line_info& li,
2470
                       shared_buffer_ref& sbr,
2471
                       scan_batch_context& sbc) override
2472
    {
2473
        auto p = logfmt::parser(sbr.to_string_fragment());
14,681✔
2474
        scan_result_t retval = scan_no_match{};
14,681✔
2475
        bool done = false;
14,681✔
2476
        logfmt_pair_handler lph(this->lf_date_time);
14,681✔
2477

2478
        if (dst.size() == 1) {
14,681✔
2479
            auto file_options = lf.get_file_options();
1,438✔
2480

2481
            if (file_options) {
1,438✔
2482
                this->lf_date_time.dts_default_zone
2483
                    = file_options->second.fo_default_zone.pp_value;
60✔
2484
            } else {
2485
                this->lf_date_time.dts_default_zone = nullptr;
1,378✔
2486
            }
2487
        }
1,438✔
2488

2489
        while (!done) {
51,634✔
2490
            auto parse_result = p.step();
36,953✔
2491

2492
            auto value_res = parse_result.match(
UNCOV
2493
                [&done](const logfmt::parser::end_of_input&) -> scan_result_t {
×
2494
                    done = true;
14,273✔
2495
                    return scan_match{};
14,273✔
2496
                },
UNCOV
2497
                [](const string_fragment&) -> scan_result_t {
×
2498
                    return scan_incomplete{};
18,017✔
2499
                },
UNCOV
2500
                [&lph](const logfmt::parser::kvpair& kvp) -> scan_result_t {
×
2501
                    lph.lph_key_frag = kvp.first;
4,267✔
2502

2503
                    return kvp.second.match(
UNCOV
2504
                        [](const logfmt::parser::bool_value& bv)
×
UNCOV
2505
                            -> scan_result_t { return scan_match{}; },
×
UNCOV
2506
                        [&lph](const logfmt::parser::float_value& fv)
×
2507
                            -> scan_result_t {
2508
                            return lph.process_value(fv.fv_str_value);
5✔
2509
                        },
UNCOV
2510
                        [&lph](const logfmt::parser::int_value& iv)
×
2511
                            -> scan_result_t {
2512
                            return lph.process_value(iv.iv_str_value);
112✔
2513
                        },
UNCOV
2514
                        [&lph](const logfmt::parser::quoted_value& qv)
×
2515
                            -> scan_result_t {
2516
                            auto_mem<yajl_handle_t> handle(yajl_free);
353✔
2517
                            yajl_callbacks cb;
2518
                            scan_result_t retval;
353✔
2519

2520
                            memset(&cb, 0, sizeof(cb));
353✔
2521
                            handle = yajl_alloc(&cb, nullptr, &lph);
353✔
2522
                            cb.yajl_string = +[](void* ctx,
706✔
2523
                                                 const unsigned char* str,
2524
                                                 size_t len,
2525
                                                 yajl_string_props_t*) -> int {
2526
                                auto& lph = *((logfmt_pair_handler*) ctx);
353✔
2527
                                string_fragment value_frag{str, 0, (int) len};
353✔
2528

2529
                                auto value_res = lph.process_value(value_frag);
353✔
2530
                                return value_res.is<scan_match>();
706✔
2531
                            };
706✔
2532

2533
                            if (yajl_parse(
353✔
2534
                                    handle,
2535
                                    (const unsigned char*) qv.qv_value.data(),
353✔
2536
                                    qv.qv_value.length())
353✔
2537
                                    != yajl_status_ok
2538
                                || yajl_complete_parse(handle)
353✔
2539
                                    != yajl_status_ok)
2540
                            {
UNCOV
2541
                                log_debug("json parsing failed");
×
2542
                                string_fragment unq_frag{
UNCOV
2543
                                    qv.qv_value.sf_string,
×
UNCOV
2544
                                    qv.qv_value.sf_begin + 1,
×
UNCOV
2545
                                    qv.qv_value.sf_end - 1,
×
2546
                                };
2547

UNCOV
2548
                                return lph.process_value(unq_frag);
×
2549
                            }
2550

2551
                            return scan_match{};
353✔
2552
                        },
353✔
2553
                        [&lph](const logfmt::parser::unquoted_value& uv)
4,267✔
2554
                            -> scan_result_t {
2555
                            return lph.process_value(uv.uv_value);
3,797✔
2556
                        });
8,534✔
2557
                },
UNCOV
2558
                [](const logfmt::parser::error& err) -> scan_result_t {
×
2559
                    // log_error("logfmt parse error: %s", err.e_msg.c_str());
2560
                    return scan_no_match{};
396✔
2561
                });
36,953✔
2562
            if (value_res.is<scan_no_match>()) {
36,953✔
2563
                retval = value_res;
408✔
2564
                done = true;
408✔
2565
            }
2566
        }
36,953✔
2567

2568
        if (lph.lph_found_time == 1) {
14,681✔
2569
            this->lf_timestamp_flags = lph.lph_time_tm.et_flags;
37✔
2570
            auto& ll = dst.back();
37✔
2571
            ll.set_time(lph.lph_tv);
37✔
2572
            ll.set_level(lph.lph_level);
37✔
2573
            retval = scan_match{500};
37✔
2574
        }
2575

2576
        return retval;
29,362✔
UNCOV
2577
    }
×
2578

2579
    void annotate(logfile* lf,
16✔
2580
                  uint64_t line_number,
2581
                  string_attrs_t& sa,
2582
                  logline_value_vector& values) const override
2583
    {
2584
        static const intern_string_t FIELDS_NAME
2585
            = intern_string::lookup("fields");
22✔
2586

2587
        auto& sbr = values.lvv_sbr;
16✔
2588
        auto p = logfmt::parser(sbr.to_string_fragment());
16✔
2589
        auto done = false;
16✔
2590
        size_t found_body = 0;
16✔
2591

2592
        while (!done) {
133✔
2593
            auto parse_result = p.step();
117✔
2594

2595
            done = parse_result.match(
234✔
2596
                [](const logfmt::parser::end_of_input&) { return true; },
16✔
UNCOV
2597
                [](const string_fragment&) { return false; },
×
UNCOV
2598
                [this, &sa, &values, &found_body](
×
2599
                    const logfmt::parser::kvpair& kvp) {
2600
                    auto value_frag = kvp.second.match(
101✔
UNCOV
2601
                        [this, &kvp, &values](
×
2602
                            const logfmt::parser::bool_value& bv) {
UNCOV
2603
                            auto lvm = logline_value_meta{intern_string::lookup(
×
UNCOV
2604
                                                              kvp.first),
×
2605
                                                          value_kind_t::
2606
                                                              VALUE_INTEGER,
2607
                                                          logline_value_meta::
UNCOV
2608
                                                              table_column{0},
×
UNCOV
2609
                                                          (log_format*) this}
×
UNCOV
2610
                                           .with_struct_name(FIELDS_NAME);
×
UNCOV
2611
                            values.lvv_values.emplace_back(lvm, bv.bv_value);
×
UNCOV
2612
                            values.lvv_values.back().lv_origin
×
UNCOV
2613
                                = to_line_range(bv.bv_str_value);
×
2614

UNCOV
2615
                            return bv.bv_str_value;
×
UNCOV
2616
                        },
×
UNCOV
2617
                        [this, &kvp, &values](
×
2618
                            const logfmt::parser::int_value& iv) {
UNCOV
2619
                            auto lvm = logline_value_meta{intern_string::lookup(
×
UNCOV
2620
                                                              kvp.first),
×
2621
                                                          value_kind_t::
2622
                                                              VALUE_INTEGER,
2623
                                                          logline_value_meta::
UNCOV
2624
                                                              table_column{0},
×
UNCOV
2625
                                                          (log_format*) this}
×
UNCOV
2626
                                           .with_struct_name(FIELDS_NAME);
×
UNCOV
2627
                            values.lvv_values.emplace_back(lvm, iv.iv_value);
×
UNCOV
2628
                            values.lvv_values.back().lv_origin
×
UNCOV
2629
                                = to_line_range(iv.iv_str_value);
×
UNCOV
2630
                            return iv.iv_str_value;
×
UNCOV
2631
                        },
×
2632
                        [this, &kvp, &values](
101✔
2633
                            const logfmt::parser::float_value& fv) {
UNCOV
2634
                            auto lvm = logline_value_meta{intern_string::lookup(
×
UNCOV
2635
                                                              kvp.first),
×
2636
                                                          value_kind_t::
2637
                                                              VALUE_INTEGER,
2638
                                                          logline_value_meta::
UNCOV
2639
                                                              table_column{0},
×
UNCOV
2640
                                                          (log_format*) this}
×
UNCOV
2641
                                           .with_struct_name(FIELDS_NAME);
×
UNCOV
2642
                            values.lvv_values.emplace_back(lvm, fv.fv_value);
×
UNCOV
2643
                            values.lvv_values.back().lv_origin
×
UNCOV
2644
                                = to_line_range(fv.fv_str_value);
×
2645

UNCOV
2646
                            return fv.fv_str_value;
×
UNCOV
2647
                        },
×
UNCOV
2648
                        [](const logfmt::parser::quoted_value& qv) {
×
2649
                            return qv.qv_value;
31✔
2650
                        },
UNCOV
2651
                        [](const logfmt::parser::unquoted_value& uv) {
×
2652
                            return uv.uv_value;
70✔
2653
                        });
2654
                    auto value_lr = to_line_range(value_frag);
101✔
2655

2656
                    auto known_field = false;
101✔
2657
                    if (kvp.first.is_one_of(
101✔
2658
                            "timestamp"_frag, "time"_frag, "ts"_frag, "t"_frag))
2659
                    {
2660
                        sa.emplace_back(value_lr, L_TIMESTAMP.value());
16✔
2661
                        known_field = true;
16✔
2662
                    } else if (kvp.first.is_one_of("level"_frag, "lvl"_frag)) {
85✔
2663
                        sa.emplace_back(value_lr, L_LEVEL.value());
16✔
2664
                        known_field = true;
16✔
2665
                    } else if (kvp.first.is_one_of("msg"_frag,
69✔
2666
                                                   "message"_frag)) {
2667
                        sa.emplace_back(value_lr, SA_BODY.value());
16✔
2668
                        found_body += 1;
16✔
2669
                    } else if (kvp.second.is<logfmt::parser::quoted_value>()
53✔
2670
                               || kvp.second
102✔
2671
                                      .is<logfmt::parser::unquoted_value>())
49✔
2672
                    {
2673
                        auto vkind = value_frag.startswith("\"")
53✔
2674
                            ? value_kind_t::VALUE_JSON
53✔
2675
                            : value_kind_t::VALUE_TEXT;
53✔
2676
                        auto lvm = logline_value_meta{
2677
                            intern_string::lookup(kvp.first),
53✔
2678
                            vkind,
UNCOV
2679
                            logline_value_meta::table_column{0},
×
UNCOV
2680
                            (log_format*) this,
×
2681
                        };
53✔
2682
                        lvm.with_struct_name(FIELDS_NAME);
53✔
2683
                        values.lvv_values.emplace_back(lvm, value_frag);
53✔
2684
                        values.lvv_values.back().lv_origin = value_lr;
53✔
2685
                    }
53✔
2686
                    if (known_field) {
101✔
2687
                        auto key_with_eq = kvp.first;
32✔
2688
                        key_with_eq.sf_end += 1;
32✔
2689
                        sa.emplace_back(to_line_range(key_with_eq),
32✔
2690
                                        SA_REPLACED.value());
64✔
2691
                    } else {
2692
                        sa.emplace_back(to_line_range(kvp.first),
69✔
2693
                                        VC_ROLE.value(role_t::VCR_OBJECT_KEY));
138✔
2694
                    }
2695
                    return false;
101✔
2696
                },
2697
                [line_number, &sbr](const logfmt::parser::error& err) {
117✔
UNCOV
2698
                    log_error(
×
2699
                        "bad line %.*s", (int) sbr.length(), sbr.get_data());
UNCOV
2700
                    log_error("%lld:logfmt parse error: %s",
×
2701
                              line_number,
2702
                              err.e_msg.c_str());
UNCOV
2703
                    return true;
×
2704
                });
2705
        }
117✔
2706

2707
        if (found_body == 1) {
16✔
2708
            sa.emplace_back(line_range::empty_at(sbr.length()),
16✔
2709
                            SA_BODY.value());
32✔
2710
        }
2711

2712
        log_format::annotate(lf, line_number, sa, values);
16✔
2713
    }
16✔
2714

2715
    std::shared_ptr<log_format> specialized(int fmt_lock) override
6✔
2716
    {
2717
        auto retval = std::make_shared<logfmt_format>(*this);
6✔
2718

2719
        retval->lf_specialized = true;
6✔
2720
        return retval;
12✔
2721
    }
6✔
2722
};
2723

2724
static auto format_binder = injector::bind_multiple<log_format>()
2725
                                .add<logfmt_format>()
2726
                                .add<bro_log_format>()
2727
                                .add<w3c_log_format>()
2728
                                .add<metrics_log_format>()
2729
                                .add<o1_generic_log_format>()
2730
                                .add<piper_log_format>();
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc