• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tstack / lnav / 25603066199-3032

09 May 2026 02:05PM UTC coverage: 70.204% (+0.04%) from 70.162%
25603066199-3032

push

github

tstack
[tabular] multi-line support

238 of 261 new or added lines in 9 files covered. (91.19%)

10 existing lines in 3 files now uncovered.

57551 of 81977 relevant lines covered (70.2%)

634088.25 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.89
/src/log_format_impls.cc
1
/**
2
 * Copyright (c) 2007-2017, Timothy Stack
3
 *
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * * Redistributions of source code must retain the above copyright notice, this
10
 * list of conditions and the following disclaimer.
11
 * * Redistributions in binary form must reproduce the above copyright notice,
12
 * this list of conditions and the following disclaimer in the documentation
13
 * and/or other materials provided with the distribution.
14
 * * Neither the name of Timothy Stack nor the names of its contributors
15
 * may be used to endorse or promote products derived from this software
16
 * without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 *
29
 * @file log_format_impls.cc
30
 */
31

32
#include <algorithm>
33
#include <chrono>
34
#include <memory>
35
#include <utility>
36

37
#include "log_format.hh"
38

39
#include <stdio.h>
40

41
#include "base/humanize.hh"
42
#include "base/injector.bind.hh"
43
#include "base/separated_string.hh"
44
#include "base/string_attr_type.hh"
45
#include "config.h"
46
#include "formats/logfmt/logfmt.parser.hh"
47
#include "log_vtab_impl.hh"
48
#include "ptimec.hh"
49
#include "scn/scan.h"
50
#include "sql_util.hh"
51
#include "yajlpp/yajlpp.hh"
52

53
using std::string_literals::operator""s;
54

55
class piper_log_format : public log_format {
56
public:
57
    const intern_string_t get_name() const override
18,345✔
58
    {
59
        static const intern_string_t RETVAL
60
            = intern_string::lookup("lnav_piper_log");
20,201✔
61

62
        return RETVAL;
18,345✔
63
    }
64

65
    scan_result_t scan(logfile& lf,
14,796✔
66
                       std::vector<logline>& dst,
67
                       const line_info& li,
68
                       shared_buffer_ref& sbr,
69
                       scan_batch_context& sbc) override
70
    {
71
        if (lf.has_line_metadata()
14,796✔
72
            && lf.get_text_format() == text_format_t::TF_LOG)
14,796✔
73
        {
74
            auto& ll = dst.back();
293✔
75
            ll.set_time(li.li_timestamp);
293✔
76
            ll.set_level(li.li_level);
293✔
77
            return scan_match{1};
293✔
78
        }
79

80
        return scan_no_match{"not a piper capture"};
14,503✔
81
    }
82

83
    static constexpr int TIMESTAMP_SIZE = 28;
84

85
    void annotate(logfile* lf,
59✔
86
                  uint64_t line_number,
87
                  string_attrs_t& sa,
88
                  logline_value_vector& values) const override
89
    {
90
        auto lr = line_range{0, TIMESTAMP_SIZE};
59✔
91
        sa.emplace_back(lr, L_TIMESTAMP.value());
59✔
92
        log_format::annotate(lf, line_number, sa, values);
59✔
93
    }
59✔
94

95
    void get_subline(const log_format_file_state& lffs,
65✔
96
                     const logline& ll,
97
                     shared_buffer_ref& sbr,
98
                     subline_options opts) override
99
    {
100
        this->plf_cached_line.resize(TIMESTAMP_SIZE);
65✔
101
        auto tlen = sql_strftime(this->plf_cached_line.data(),
65✔
102
                                 this->plf_cached_line.size(),
103
                                 ll.get_timeval(),
65✔
104
                                 'T');
105
        this->plf_cached_line.resize(tlen);
65✔
106
        {
107
            char zone_str[16];
108
            exttm tmptm;
65✔
109

110
            tmptm.et_flags |= ETF_ZONE_SET;
65✔
111
            tmptm.et_gmtoff
112
                = lnav::local_time_to_info(
130✔
113
                      date::local_seconds{ll.get_time<std::chrono::seconds>()})
65✔
114
                      .first.offset.count();
65✔
115
            off_t zone_len = 0;
65✔
116
            ftime_z(zone_str, zone_len, sizeof(zone_str), tmptm);
65✔
117
            for (off_t lpc = 0; lpc < zone_len; lpc++) {
390✔
118
                this->plf_cached_line.push_back(zone_str[lpc]);
325✔
119
            }
120
        }
121
        this->plf_cached_line.push_back(' ');
65✔
122
        const auto prefix_len = this->plf_cached_line.size();
65✔
123
        this->plf_cached_line.resize(this->plf_cached_line.size()
130✔
124
                                     + sbr.length());
65✔
125
        memcpy(
65✔
126
            &this->plf_cached_line[prefix_len], sbr.get_data(), sbr.length());
65✔
127

128
        sbr.share(this->plf_share_manager,
130✔
129
                  this->plf_cached_line.data(),
65✔
130
                  this->plf_cached_line.size());
131
    }
65✔
132

133
    std::shared_ptr<log_format> specialized(int fmt_lock) override
6✔
134
    {
135
        auto retval = std::make_shared<piper_log_format>(*this);
6✔
136

137
        retval->lf_specialized = true;
6✔
138
        retval->lf_timestamp_flags |= ETF_ZONE_SET | ETF_MICROS_SET;
6✔
139
        return retval;
12✔
140
    }
6✔
141

142
private:
143
    shared_buffer plf_share_manager;
144
    std::vector<char> plf_cached_line;
145
};
146

147
class o1_generic_log_format : public log_format {
148
public:
149
    static const pcre_format* get_pcre_log_formats()
14,874✔
150
    {
151
        static const pcre_format log_fmt[] = {
152
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>@[0-9a-zA-Z]{16,24}))"),
153
            pcre_format(
154
                R"((?x)^
155
  (?:\*\*\*\s+)?                              # optional "*** " prefix
156
  (?<timestamp>
157
      (?:
158
          \s
159
        | \d{4}[\-\/]\d{2}[\-\/]\d{2}         # YYYY-MM-DD or YYYY/MM/DD
160
        | T                                   # ISO date/time separator
161
        | \d{1,2}:\d{2}(?::\d{2}(?:[\.,]\d{1,9})?)?   # HH:MM[:SS[.frac]]
162
        | Z                                   # UTC zulu marker
163
        | [+\-]\d{2}:?\d{2}                   # timezone offset, +0500 or +05:00
164
        | (?!DBG|DEBUG\d?|ERR|INFO|WARN|NONE|CRITICAL|FATAL)    # ...not one of these levels
165
          [A-Z]{3,4}                          # 3-4 uppercase letters (e.g. month/tz abbrev)
166
      )+
167
  )
168
  [:|\s]?                                     # optional separator
169
  (trc|trace|critical|fatal|dbg\d?|debug\d?|info|warn(?:ing)?|err(?:or)?)   # log level
170
  [:|\s]                                      # separator
171
  \s*
172
)"),
173
            pcre_format(
174
                R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,+/-]+) \[(trace|debug\d?|info|warn(?:ing)?|error|critical|fatal)\]\s+)"),
175
            pcre_format(
176
                R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,+/-]+) -- (trace|debug\d?|info|warn(?:ing)?|error|critical|fatal) --\s+)"),
177

178
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+/\.-]+) \[\w\s+)"),
179
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+,/\.-]+)\s+)"),
180
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+,/\.-]+) -\s+)"),
181
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,/-]+) -\s+)"),
182
            pcre_format(
183
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\] \[(trace|debug\d?|info|warn(?:ing)?|error|critical|fatal)\]\s+)"),
184
            pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w: "
185
                        "\\.,/-]+)\\[[^\\]]+\\]\\s+"),
186
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,/-]+)\s+)"),
187

188
            pcre_format(
189
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\]\s*(\w+):?\s+)"),
190
            pcre_format(
191
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\]\s+)"),
192
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: "
193
                        "\\.,+/-]+)\\] \\w+\\s+"),
194
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: ,+/-]+)\\] "
195
                        "\\(\\d+\\)\\s+"),
196

197
            pcre_format(),
198
        };
14,874✔
199

200
        return log_fmt;
14,874✔
201
    }
202

203
    std::string get_pattern_regex(const pattern_locks& pl,
×
204
                                  uint64_t line_number) const override
205
    {
206
        auto pat_index = pl.pattern_index_for_line(line_number);
×
207
        return get_pcre_log_formats()[pat_index].name;
×
208
    }
209

210
    const intern_string_t get_name() const override
17,898✔
211
    {
212
        static const intern_string_t RETVAL
213
            = intern_string::lookup("generic_log");
19,754✔
214

215
        return RETVAL;
17,898✔
216
    }
217

218
    scan_result_t scan(logfile& lf,
14,759✔
219
                       std::vector<logline>& dst,
220
                       const line_info& li,
221
                       shared_buffer_ref& sbr,
222
                       scan_batch_context& sbc) override
223
    {
224
        exttm log_time;
14,759✔
225
        timeval log_tv;
226
        string_fragment ts;
14,759✔
227
        std::optional<string_fragment> level;
14,759✔
228
        const char* last_pos;
229

230
        if (dst.size() == 1) {
14,759✔
231
            auto file_options = lf.get_file_options();
1,427✔
232

233
            if (file_options) {
1,427✔
234
                this->lf_date_time.dts_default_zone
235
                    = file_options->second.fo_default_zone.pp_value;
60✔
236
            } else {
237
                this->lf_date_time.dts_default_zone = nullptr;
1,367✔
238
            }
239
        }
1,427✔
240

241
        if ((last_pos = this->log_scanf(sbc,
29,518✔
242
                                        dst.size(),
14,759✔
243
                                        sbr.to_string_fragment(),
244
                                        get_pcre_log_formats(),
245
                                        nullptr,
246
                                        &log_time,
247
                                        &log_tv,
248

249
                                        &ts,
250
                                        &level))
251
            != nullptr)
14,759✔
252
        {
253
            auto level_val = log_level_t::LEVEL_UNKNOWN;
3,337✔
254
            if (level) {
3,337✔
255
                level_val = string2level(level->data(), level->length());
3,337✔
256
            }
257

258
            if (!((log_time.et_flags & ETF_DAY_SET)
3,337✔
259
                  && (log_time.et_flags & ETF_MONTH_SET)
3,262✔
260
                  && (log_time.et_flags & ETF_YEAR_SET)))
3,262✔
261
            {
262
                this->check_for_new_year(dst, log_time, log_tv);
994✔
263
            }
264

265
            if (!(this->lf_timestamp_flags
6,674✔
266
                  & (ETF_MILLIS_SET | ETF_MICROS_SET | ETF_NANOS_SET))
3,337✔
267
                && !dst.empty()
2,916✔
268
                && dst.back().get_time<std::chrono::seconds>().count()
2,916✔
269
                    == log_tv.tv_sec
2,916✔
270
                && dst.back()
7,953✔
271
                        .get_subsecond_time<std::chrono::microseconds>()
5,037✔
272
                        .count()
1,700✔
273
                    != 0)
274
            {
275
                auto log_us
276
                    = dst.back()
×
277
                          .get_subsecond_time<std::chrono::microseconds>();
×
278

279
                log_time.et_nsec
280
                    = std::chrono::duration_cast<std::chrono::nanoseconds>(
×
281
                          log_us)
282
                          .count();
×
283
                log_tv.tv_usec
284
                    = std::chrono::duration_cast<std::chrono::microseconds>(
×
285
                          log_us)
286
                          .count();
×
287
            }
288

289
            auto log_us = to_us(log_tv);
3,337✔
290
            auto tid_iter = sbc.sbc_tids.insert_tid(
3,337✔
291
                sbc.sbc_allocator, string_fragment{}, log_us);
×
292
            tid_iter->second.titr_level_stats.update_msg_count(level_val);
3,337✔
293
            auto& ll = dst.back();
3,337✔
294
            ll.set_time(log_us);
3,337✔
295
            ll.set_level(level_val);
3,337✔
296
            return scan_match{5};
3,337✔
297
        }
298

299
        return scan_no_match{"no patterns matched"};
11,422✔
300
    }
301

302
    void annotate(logfile* lf,
115✔
303
                  uint64_t line_number,
304
                  string_attrs_t& sa,
305
                  logline_value_vector& values) const override
306
    {
307
        thread_local auto md = lnav::pcre2pp::match_data::unitialized();
115✔
308
        auto lffs = lf->get_format_file_state();
115✔
309
        auto& line = values.lvv_sbr;
115✔
310
        int pat_index
311
            = lffs.lffs_pattern_locks.pattern_index_for_line(line_number);
115✔
312
        const auto& fmt = get_pcre_log_formats()[pat_index];
115✔
313
        const auto line_sf = line.to_string_fragment();
115✔
314
        auto match_res = fmt.pcre->capture_from(line_sf)
115✔
315
                             .into(md)
115✔
316
                             .matches(PCRE2_NO_UTF_CHECK)
230✔
317
                             .ignore_error();
115✔
318
        if (!match_res) {
115✔
319
            return;
12✔
320
        }
321

322
        int prefix_len = md.remaining().sf_begin;
103✔
323
        auto ts_cap = md[fmt.pf_timestamp_index].value();
103✔
324
        auto lr = to_line_range(ts_cap.trim());
103✔
325
        auto level_cap = md[2];
103✔
326

327
        if (!level_cap) {
103✔
328
            lr.lr_end = prefix_len
20✔
329
                = lr.lr_start + this->lf_date_time.dts_fmt_len;
20✔
330
        }
331
        sa.emplace_back(lr, L_TIMESTAMP.value());
103✔
332

333
        values.lvv_values.emplace_back(TS_META, line, lr);
103✔
334
        values.lvv_values.back().lv_meta.lvm_format = (log_format*) this;
103✔
335

336
        if (level_cap) {
103✔
337
            if (string2level(level_cap->data(), level_cap->length(), true)
83✔
338
                != LEVEL_UNKNOWN)
83✔
339
            {
340
                values.lvv_values.emplace_back(
83✔
341
                    LEVEL_META, line, to_line_range(level_cap->trim()));
83✔
342
                values.lvv_values.back().lv_meta.lvm_format
83✔
343
                    = (log_format*) this;
83✔
344

345
                lr = to_line_range(level_cap->trim());
83✔
346
                if (lr.lr_end != (ssize_t) line.length()) {
83✔
347
                    sa.emplace_back(lr, L_LEVEL.value());
83✔
348
                }
349
            }
350
        }
351

352
        lr.lr_start = 0;
103✔
353
        lr.lr_end = prefix_len;
103✔
354
        sa.emplace_back(lr, L_PREFIX.value());
103✔
355

356
        lr.lr_start = prefix_len;
103✔
357
        lr.lr_end = line.length();
103✔
358
        sa.emplace_back(lr, SA_BODY.value());
103✔
359

360
        log_format::annotate(lf, line_number, sa, values);
103✔
361
    }
362

363
    std::shared_ptr<log_format> specialized(int fmt_lock) override
56✔
364
    {
365
        auto retval = std::make_shared<o1_generic_log_format>(*this);
56✔
366

367
        retval->lf_specialized = true;
56✔
368
        return retval;
112✔
369
    }
56✔
370

371
    bool hide_field(const intern_string_t field_name, bool val) override
35✔
372
    {
373
        if (field_name == TS_META.lvm_name) {
35✔
374
            TS_META.lvm_user_hidden = val;
12✔
375
            return true;
12✔
376
        }
377
        if (field_name == LEVEL_META.lvm_name) {
23✔
378
            LEVEL_META.lvm_user_hidden = val;
12✔
379
            return true;
12✔
380
        }
381
        if (field_name == OPID_META.lvm_name) {
11✔
382
            OPID_META.lvm_user_hidden = val;
11✔
383
            return true;
11✔
384
        }
385
        return false;
×
386
    }
387

388
    std::map<intern_string_t, logline_value_meta> get_field_states() override
97✔
389
    {
390
        return {
391
            {TS_META.lvm_name, TS_META},
392
            {LEVEL_META.lvm_name, LEVEL_META},
393
            {OPID_META.lvm_name, OPID_META},
394
        };
485✔
395
    }
97✔
396

397
private:
398
    static logline_value_meta TS_META;
399
    static logline_value_meta LEVEL_META;
400
    static logline_value_meta OPID_META;
401
};
402

403
logline_value_meta o1_generic_log_format::TS_META{
404
    intern_string::lookup("log_time"),
405
    value_kind_t::VALUE_TEXT,
406
    logline_value_meta::table_column{2},
407
};
408

409
logline_value_meta o1_generic_log_format::LEVEL_META{
410
    intern_string::lookup("log_level"),
411
    value_kind_t::VALUE_TEXT,
412
    logline_value_meta::table_column{3},
413
};
414

415
logline_value_meta o1_generic_log_format::OPID_META{
416
    intern_string::lookup("log_opid"),
417
    value_kind_t::VALUE_TEXT,
418
    logline_value_meta::internal_column{},
419
};
420

421
std::string
422
from_escaped_string(const char* str, size_t len)
25✔
423
{
424
    std::string retval;
25✔
425

426
    for (size_t lpc = 0; lpc < len; lpc++) {
50✔
427
        switch (str[lpc]) {
25✔
428
            case '\\':
25✔
429
                if ((lpc + 3) < len && str[lpc + 1] == 'x') {
25✔
430
                    int ch;
431

432
                    if (sscanf(&str[lpc + 2], "%2x", &ch) == 1) {
25✔
433
                        retval.append(1, (char) ch & 0xff);
25✔
434
                        lpc += 3;
25✔
435
                    }
436
                }
437
                break;
25✔
438
            default:
×
439
                retval.append(1, str[lpc]);
×
440
                break;
×
441
        }
442
    }
443

444
    return retval;
25✔
445
}
×
446

447
// -----------------------------------------------------------------
448
// Recognizes CSV files whose first line is a header with a
449
// timestamp-like first column (`timestamp`, `time`, `ts`, or a name
450
// starting with `date`), and whose subsequent rows begin with a
451
// parseable timestamp.  Tolerates a leading UTF-8 BOM, the
452
// Excel-style `sep=<ch>` delimiter hint, CRLF line endings, and
453
// CSV-style `""`-escaped double quotes inside quoted fields.
454
// The header line is emitted as an ignored logline so lnav stays
455
// locked to this format for the rest of the file.
456
//
457
// Each non-timestamp column is exposed as a `VALUE_FLOAT` field so
458
// queries such as `SELECT cpu_pct FROM metrics_log` work per-file.
459
// The cross-file long-format `all_metrics` SQL virtual table
460
// (source/metric/value across all loaded metric files) lives in
461
// `metrics_vtab.cc`.
462
// -----------------------------------------------------------------
463
class metrics_log_format : public log_format {
464
public:
465
    metrics_log_format()
934✔
466
    {
934✔
467
        this->lf_multiline = false;
934✔
468
        this->lf_is_metric = true;
934✔
469
        this->lf_time_ordered = false;
934✔
470
    }
934✔
471

472
    const intern_string_t get_name() const override
18,466✔
473
    {
474
        static const intern_string_t RETVAL
475
            = intern_string::lookup("metrics_log");
20,322✔
476

477
        return RETVAL;
18,466✔
478
    }
479

480
    scan_result_t parse_line(const string_fragment& line_sf,
484✔
481
                             std::vector<logline>& dst,
482
                             scan_batch_context& sbc)
483
    {
484
        separated_string ss{line_sf};
484✔
485
        ss.with_separator(this->mlf_separator);
484✔
486
        if (!this->mlf_headers.empty()) {
484✔
487
            ss.ss_expected_count = this->mlf_headers.size();
484✔
488
        }
489
        auto iter = ss.begin();
484✔
490
        if (iter == ss.end()) {
484✔
491
            return scan_error{"empty metric row"};
×
492
        }
493
        const auto ts_sf = *iter;
484✔
494

495
        auto& dts = this->lf_date_time;
484✔
496
        exttm tm;
484✔
497
        timeval tv;
498
        if (dts.scan(ts_sf.data(), ts_sf.length(), nullptr, &tm, tv) == nullptr)
484✔
499
        {
500
            return scan_error{fmt::format(
1✔
501
                FMT_STRING("metric row timestamp did not parse: {}"),
3✔
502
                ts_sf.to_string())};
3✔
503
        }
504
        dst.back().set_time(to_us(tv));
483✔
505
        // Propagate what the scanner learned (zone offset, subsecond
506
        // precision) so downstream consumers can reproduce the
507
        // timestamp in the right form.
508
        this->lf_timestamp_flags |= tm.et_flags;
483✔
509

510
        // Update per-column min/max stats.  Every non-timestamp
511
        // column is VALUE_FLOAT, so the field-def index maps 1:1
512
        // onto `sbc_value_stats`.  Dispatch on the iterator's
513
        // `kind()` so integers skip the float parser and so unit-
514
        // suffixed values (e.g. `1.5k`) fall back to `humanize`.
515
        sbc.sbc_value_stats.resize(this->mlf_field_defs.size());
483✔
516
        ++iter;
483✔
517
        auto field_index = 0;
483✔
518
        for (; iter != ss.end(); ++iter, ++field_index) {
1,695✔
519
            if (field_index >= this->mlf_field_defs.size()) {
1,214✔
520
                return scan_error{
2✔
521
                    fmt::format(FMT_STRING("metric row has too many fields, "
4✔
522
                                           "expecting only {} fields"),
523
                                this->mlf_field_defs.size())};
6✔
524
            }
525
            auto& stats = sbc.sbc_value_stats[field_index];
1,212✔
526
            // Track the widest raw cell so the LOG-view renderer can
527
            // column-align values across rows.
528
            const auto cell_len = static_cast<int64_t>((*iter).length());
1,212✔
529
            if (cell_len > stats.lvs_width) {
1,212✔
530
                stats.lvs_width = cell_len;
415✔
531
            }
532
            // Non-numeric cells get fed into the column's HLL distinct
533
            // estimator instead of the numeric stats.  Hash the raw
534
            // cell bytes — CSV uses a consistent escape form for any
535
            // given logical value, so unescape isn't required.
536
            if (iter.kind() == separated_string::cell_kind::other) {
1,212✔
537
                stats.add_text(*iter);
44✔
538
            }
539
            parse_cell(iter, parse_context::scan)
2,424✔
540
                .match(
1,212✔
541
                    [](empty_cell) {},
×
542
                    [&stats](int64_t i) {
×
543
                        stats.add_value(static_cast<double>(i));
781✔
544
                    },
781✔
545
                    [&stats](double d) { stats.add_value(d); },
343✔
546
                    [&stats](humanized_cell hc) { stats.add_value(hc.value); },
1,256✔
547
                    [](const text_cell& tc) {});
1,212✔
548
        }
549
        if (field_index < this->mlf_field_defs.size()) {
481✔
550
            return scan_error{fmt::format(
1✔
551
                FMT_STRING("metric row has too few fields: found {}, "
2✔
552
                           "expected {} fields"),
553
                field_index,
554
                this->mlf_field_defs.size())};
3✔
555
        }
556
        if (!this->lf_specialized) {
480✔
557
            auto number_cells = 0;
133✔
558
            for (const auto& stats : sbc.sbc_value_stats) {
474✔
559
                number_cells += stats.lvs_count;
341✔
560
            }
561
            if (number_cells == 0) {
133✔
562
                return scan_error{"metric row has no numeric fields"};
4✔
563
            }
564
        }
565

566
        return scan_match{500};
478✔
567
    }
568

569
    scan_result_t scan_int(std::vector<logline>& dst,
415✔
570
                           const line_info& li,
571
                           shared_buffer_ref& sbr,
572
                           scan_batch_context& sbc)
573
    {
574
        auto line_sf = sbr.to_string_fragment();
415✔
575

576
        // Reindex (triggered by e.g. `:set-file-timezone`) clears
577
        // `lf_index` but leaves `lf_specialized` set, so the first
578
        // post-clear scan arrives here with an empty `dst`.  Seed
579
        // from epoch rather than reading `dst.back()` on an empty
580
        // vector.
581
        auto& ll = dst.back();
415✔
582
        ll.set_level(LEVEL_STATS);
415✔
583
        auto retval = this->parse_line(line_sf, dst, sbc);
415✔
584
        return retval;
830✔
585
    }
586

587
    scan_result_t scan(logfile& lf,
14,796✔
588
                       std::vector<logline>& dst,
589
                       const line_info& li,
590
                       shared_buffer_ref& sbr,
591
                       scan_batch_context& sbc) override
592
    {
593
        if (li.li_partial) {
14,796✔
594
            return scan_incomplete{};
26✔
595
        }
596

597
        // Keep the scanner's default zone in sync with the file's
598
        // current options on every scan.  `:set-file-timezone`
599
        // mutates the options after the format has already specialized,
600
        // so a once-at-detection sync leaves stale state and every
601
        // subsequent timestamp parses against the wrong zone.
602
        {
603
            auto file_options = lf.get_file_options();
14,770✔
604
            this->lf_date_time.dts_default_zone = file_options
14,770✔
605
                ? file_options->second.fo_default_zone.pp_value
14,770✔
606
                : nullptr;
607
        }
14,770✔
608

609
        if (this->lf_specialized) {
14,770✔
610
            if (dst.size() == 1) {
350✔
611
                // Reindex (e.g. after `:set-file-timezone`) clears
612
                // `lf_index` and starts scanning from byte zero again.
613
                // The format is still locked in from the prior pass,
614
                // so just reproduce the header's ignored-logline so
615
                // the data rows that follow land in `scan_int` with
616
                // a valid `dst.back()`.
617
                auto& ll = dst.back();
1✔
618
                ll.set_level(LEVEL_UNKNOWN);
1✔
619
                ll.set_ignore(true);
1✔
620
                return scan_match{500};
1✔
621
            }
622
            // we've locked on, don't need to figure out the header
623
            return scan_int(dst, li, sbr, sbc);
349✔
624
        }
625

626
        if (dst.size() < 2) {
14,420✔
627
            return scan_no_match{"waiting for header and data row"};
1,460✔
628
        }
629

630
        if (dst.size() > 3) {
12,960✔
631
            return scan_no_match{
11,570✔
632
                "line is after CSV headers and first data row"};
11,570✔
633
        }
634

635
        // First part of the file — reset any per-file state left
636
        // over from a prior file on this shared base instance.
637
        this->mlf_headers.clear();
1,390✔
638
        this->mlf_field_defs.clear();
1,390✔
639
        this->mlf_separator = ',';
1,390✔
640
        auto has_sep_directive = false;
1,390✔
641
        for (auto ll_iter = dst.begin(); ll_iter != dst.end(); ++ll_iter) {
1,527✔
642
            if (ll_iter->get_sub_offset() != 0) {
1,461✔
NEW
643
                continue;
×
644
            }
645
            auto read_res = lf.read_raw_message(ll_iter);
1,461✔
646
            if (read_res.isErr()) {
1,461✔
647
                return scan_no_match{"cannot read header"};
×
648
            }
649

650
            auto hdr_sbr = read_res.unwrap();
1,461✔
651
            auto hdr_sf = hdr_sbr.to_string_fragment();
1,461✔
652
            // Excel-flavor CSVs sometimes start with `sep=<ch>` to
653
            // hint the delimiter.  Consume that as metadata and wait
654
            // for the real header on the next line.
655
            if (ll_iter == dst.begin() && hdr_sf.startswith("sep=")) {
1,461✔
656
                if (dst.size() == 1) {
2✔
657
                    return scan_no_match{"waiting for more data"};
×
658
                }
659

660
                const auto sep_sf = hdr_sf.substr(4);
2✔
661
                if (sep_sf.empty()) {
2✔
662
                    return scan_error{"sep= hint missing separator character"};
×
663
                }
664
                this->mlf_separator = sep_sf.data()[0];
2✔
665
                ll_iter->set_time(std::chrono::microseconds::zero());
2✔
666
                ll_iter->set_level(LEVEL_UNKNOWN);
2✔
667
                ll_iter->set_ignore(true);
2✔
668
                has_sep_directive = true;
2✔
669
                log_info("metrics_log found 'sep=' header: %x",
2✔
670
                         this->mlf_separator);
671
            } else if (this->mlf_headers.empty()) {
1,459✔
672
                // Header row: require a shape like
673
                // `timestamp,<name>,<name>...`.  This is a conservative
674
                // detector — files without a leading timestamp-named
675
                // column are left to other formats.
676
                separated_string ss{hdr_sf};
1,390✔
677
                if (!has_sep_directive) {
1,390✔
678
                    auto detect_res
679
                        = separated_string::detect_separator(hdr_sf);
1,388✔
680
                    if (detect_res) {
1,388✔
681
                        this->mlf_separator = detect_res.value();
319✔
682
                        log_info("metrics_log detected separator: %x",
319✔
683
                                 this->mlf_separator);
684
                    }
685
                }
686
                ss.with_separator(this->mlf_separator);
1,390✔
687
                std::vector<intern_string_t> fields;
1,390✔
688
                for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
4,558✔
689
                    // Header cells may be CSV-quoted (e.g. Grafana
690
                    // exports wrap PromQL expressions that contain
691
                    // commas or doubled quotes).  Collapse `""` back
692
                    // to `"` so the interned column name matches what
693
                    // the user wrote.
694
                    fields.emplace_back(intern_string::lookup(
3,168✔
695
                        separated_string::unescape_quoted(*iter)));
6,336✔
696
                    log_info("  metrics header: %s", fields.back().c_str());
3,168✔
697
                }
698
                if (fields.size() < 2) {
1,390✔
699
                    return scan_no_match{"too few columns for a metric CSV"};
870✔
700
                }
701
                const auto first = fields[0].to_string_fragment();
520✔
702
                const bool is_time_header = first.iequal("timestamp"_frag)
520✔
703
                    || first.iequal("time"_frag) || first.iequal("ts"_frag)
458✔
704
                    || (first.length() >= 4
1,323✔
705
                        && strncasecmp(first.data(), "date", 4) == 0);
345✔
706
                if (!is_time_header) {
520✔
707
                    return scan_error{fmt::format(
451✔
708
                        FMT_STRING(
1,353✔
709
                            "first column '{}' is not a timestamp header "
710
                            "(expected 'timestamp', 'time', 'ts', or a "
711
                            "'date'-prefixed name)"),
712
                        first.to_string())};
1,353✔
713
                }
714

715
                this->mlf_headers = std::move(fields);
69✔
716
                log_info("metrics_log found %zu header columns",
69✔
717
                         this->mlf_headers.size());
718
                this->build_field_defs();
69✔
719
                ll_iter->set_time(std::chrono::microseconds::zero());
69✔
720
                ll_iter->set_level(LEVEL_UNKNOWN);
69✔
721
                ll_iter->set_ignore(true);
69✔
722
            } else {
1,390✔
723
                auto scan_res = this->parse_line(hdr_sf, dst, sbc);
69✔
724
                if (!scan_res.is<scan_match>()) {
69✔
725
                    log_warning("first data row did not match");
3✔
726
                    return scan_res;
3✔
727
                }
728
                ll_iter->set_level(LEVEL_STATS);
66✔
729
            }
69✔
730
        }
2,785✔
731
        return this->scan_int(dst, li, sbr, sbc);
66✔
732
    }
733

734
    std::optional<size_t> stats_index_for_value(
1,163✔
735
        const intern_string_t& name) const override
736
    {
737
        for (size_t i = 0; i < this->mlf_field_defs.size(); ++i) {
2,239✔
738
            if (this->mlf_field_defs[i].lvm_name == name) {
2,188✔
739
                return i;
1,112✔
740
            }
741
        }
742
        return std::nullopt;
51✔
743
    }
744

745
    std::vector<logline_value_meta> get_value_metadata() const override
86✔
746
    {
747
        return this->mlf_field_defs;
86✔
748
    }
749

750
    size_t get_value_metadata_count() const override
431✔
751
    {
752
        return this->mlf_field_defs.size();
431✔
753
    }
754

755
    void annotate(logfile* lf,
976✔
756
                  uint64_t line_number,
757
                  string_attrs_t& sa,
758
                  logline_value_vector& values) const override
759
    {
760
        auto& sbr = values.lvv_sbr;
976✔
761
        const auto line_sf = sbr.to_string_fragment().trim("\r\n");
976✔
762

763
        separated_string ss{line_sf};
976✔
764
        ss.with_separator(this->mlf_separator);
976✔
765
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
7,678✔
766
            const auto field = *iter;
3,355✔
767
            const auto lr = line_range{field.sf_begin, field.sf_end};
3,355✔
768

769
            if (iter.index() == 0) {
3,355✔
770
                sa.emplace_back(lr, L_TIMESTAMP.value());
976✔
771
                continue;
976✔
772
            }
773
            // The header row is emitted as an ignored logline, so
774
            // `mlf_field_defs` (which excludes col 0) has one entry
775
            // per data column.  Extra trailing columns are dropped.
776
            const auto field_index = iter.index() - 1;
2,379✔
777
            if (field_index >= this->mlf_field_defs.size()) {
2,379✔
778
                break;
4✔
779
            }
780
            // Parse once rather than paying the re-parse cost each
781
            // time SQL reads the cell.  The variant preserves int vs
782
            // float so the renderer can format integers without a
783
            // trailing decimal point.  The static `mlf_hidden_columns`
784
            // registry is overlaid so hide state propagates across
785
            // specialized instances that share column names.
786
            auto meta = this->mlf_field_defs[field_index];
2,375✔
787
            if (mlf_hidden_columns.count(meta.lvm_name) != 0) {
2,375✔
788
                meta.lvm_user_hidden = true;
99✔
789
            }
790
            parse_cell(iter, parse_context::annotate)
2,375✔
791
                .match(
2,375✔
792
                    [&](empty_cell) { values.lvv_values.emplace_back(meta); },
×
793
                    [&](int64_t i) { values.lvv_values.emplace_back(meta, i); },
1,574✔
794
                    [&](double d) { values.lvv_values.emplace_back(meta, d); },
615✔
795
                    [&](humanized_cell hc) {
×
796
                        // Carry the detected unit on the per-value meta so
797
                        // downstream renderers can call humanize::format
798
                        // against the base-unit value.
799
                        auto cell_meta = meta;
124✔
800
                        cell_meta.lvm_unit_suffix = hc.unit_suffix;
124✔
801
                        values.lvv_values.emplace_back(cell_meta, hc.value);
124✔
802
                    },
124✔
803
                    [&](const text_cell& tc) {
2,375✔
804
                        values.lvv_values.emplace_back(meta, tc.value);
62✔
805
                        values.lvv_values.back().lv_meta.lvm_kind
62✔
806
                            = value_kind_t::VALUE_TEXT;
62✔
807
                    });
62✔
808
            values.lvv_values.back().lv_origin = lr;
2,375✔
809
        }
2,375✔
810

811
        log_format::annotate(lf, line_number, sa, values);
976✔
812
    }
976✔
813

814
    std::shared_ptr<log_format> specialized(int fmt_lock) override
63✔
815
    {
816
        auto retval = std::make_shared<metrics_log_format>(*this);
63✔
817

818
        retval->lf_specialized = true;
63✔
819
        return retval;
126✔
820
    }
63✔
821

822
private:
823
    // A parsed metric cell: either an int64, a double, or nothing
824
    // (empty or unparseable).  Keeping the original integer type
825
    // lets the renderer format int cells without a decimal point,
826
    // while callers that want a single numeric type can coerce via
827
    // the `match` below.
828
    struct empty_cell {};
829
    // Humanized cell: the raw text had a recognized unit suffix
830
    // ("1.5KB", "20ms", "2.5GHz").  The value is already normalized
831
    // to the base unit (bytes, seconds, Hz) and `unit_suffix` carries
832
    // the canonical suffix so downstream renderers can format it back
833
    // to human-friendly form.
834
    struct humanized_cell {
835
        double value;
836
        intern_string_t unit_suffix;
837
    };
838
    struct text_cell {
839
        std::string value;
840
    };
841
    using parsed_cell_t = mapbox::util::
842
        variant<empty_cell, int64_t, double, humanized_cell, text_cell>;
843

844
    enum class parse_context {
845
        scan,
846
        annotate,
847
    };
848

849
    static parsed_cell_t parse_cell(const separated_string::iterator& iter,
3,587✔
850
                                    parse_context pc)
851
    {
852
        const auto field = *iter;
3,587✔
853
        switch (iter.kind()) {
3,587✔
854
            case separated_string::cell_kind::empty: {
×
855
                return parsed_cell_t{empty_cell{}};
×
856
            }
857
            case separated_string::cell_kind::integer: {
2,355✔
858
                if (auto res = scn::scan_value<int64_t>(field.to_string_view()))
2,355✔
859
                {
860
                    return parsed_cell_t{res->value()};
2,355✔
861
                }
862
                return parsed_cell_t{empty_cell{}};
×
863
            }
864
            case separated_string::cell_kind::floating: {
958✔
865
                if (auto res = scn::scan_value<double>(field.to_string_view()))
958✔
866
                {
867
                    return parsed_cell_t{res->value()};
958✔
868
                }
869
                return parsed_cell_t{empty_cell{}};
×
870
            }
871
            case separated_string::cell_kind::number_with_suffix: {
168✔
872
                // Classifier already confirmed the shape is `<num><unit>`.
873
                if (auto res = humanize::try_from<double>(field)) {
168✔
874
                    return parsed_cell_t{humanized_cell{
336✔
875
                        res->value,
168✔
876
                        intern_string::lookup(res->unit_suffix),
168✔
877
                    }};
168✔
878
                }
879
                return parsed_cell_t{empty_cell{}};
×
880
            }
881
            case separated_string::cell_kind::other: {
106✔
882
                // Plain text; humanize wouldn't have parsed it.
883
                switch (pc) {
106✔
884
                    case parse_context::scan:
44✔
885
                        // During scanning, treat unparseable text as
886
                        // empty so it doesn't mess with stats or
887
                        // trigger a type change on the column.
888
                        return parsed_cell_t{empty_cell{}};
44✔
889
                    case parse_context::annotate:
62✔
890
                        // During annotation, preserve the text so the
891
                        // renderer can show it and the user can query
892
                        // against it.
893
                        return parsed_cell_t{text_cell{
124✔
894
                            separated_string::unescape_quoted(field)}};
62✔
895
                }
896
            }
897
        }
898
        return parsed_cell_t{empty_cell{}};
×
899
    }
900

901
    void build_field_defs()
69✔
902
    {
903
        this->mlf_field_defs.clear();
69✔
904
        // Columns 1..N (timestamp is column 0) become VALUE_FLOAT
905
        // fields.  Column names are kept verbatim from the header;
906
        // the CREATE TABLE generator applies SQL quoting for names
907
        // that need it.  Pass `this` as the owning format so the
908
        // field_overlay_source treats these as real table fields
909
        // (show/hide, chart, etc.) rather than skipping them.
910
        for (size_t h = 1; h < this->mlf_headers.size(); ++h) {
243✔
911
            this->mlf_field_defs.emplace_back(
174✔
912
                this->mlf_headers[h],
174✔
913
                value_kind_t::VALUE_FLOAT,
×
914
                logline_value_meta::table_column{h - 1},
×
915
                this);
174✔
916
            if (mlf_hidden_columns.count(this->mlf_headers[h]) != 0) {
174✔
917
                this->mlf_field_defs.back().lvm_user_hidden = true;
×
918
            }
919
        }
920
    }
69✔
921

922
public:
923
    // Hide state lives in a static set instead of on the meta so it
924
    // survives file re-detection (which rebuilds `mlf_field_defs` from
925
    // scratch) and propagates across every specialized instance that
926
    // shares the column name.  Only the currently-hidden columns are
927
    // tracked — showing a column erases its entry rather than storing
928
    // `false`, so the set stays bounded across hide/show cycles.
929
    bool hide_field(const intern_string_t field_name, bool val) override
18✔
930
    {
931
        if (val) {
18✔
932
            mlf_hidden_columns.insert(field_name);
7✔
933
        } else {
934
            mlf_hidden_columns.erase(field_name);
11✔
935
        }
936
        for (auto& meta : this->mlf_field_defs) {
66✔
937
            if (meta.lvm_name == field_name) {
48✔
938
                if (val) {
17✔
939
                    meta.lvm_user_hidden = true;
6✔
940
                } else {
941
                    meta.lvm_user_hidden.reset();
11✔
942
                }
943
            }
944
        }
945
        return true;
18✔
946
    }
947

948
    std::map<intern_string_t, logline_value_meta> get_field_states() override
191✔
949
    {
950
        std::map<intern_string_t, logline_value_meta> retval;
191✔
951
        for (const auto& meta : this->mlf_field_defs) {
416✔
952
            retval.emplace(meta.lvm_name, meta);
225✔
953
        }
954
        // Include columns that were hidden before this instance saw
955
        // its header, so session save still captures them.
956
        for (const auto& name : mlf_hidden_columns) {
194✔
957
            if (retval.count(name) != 0) {
3✔
958
                continue;
3✔
959
            }
960
            logline_value_meta meta{name, value_kind_t::VALUE_FLOAT};
×
961
            meta.lvm_user_hidden = true;
×
962
            retval.emplace(name, std::move(meta));
×
963
        }
964
        return retval;
191✔
965
    }
×
966

967
    std::vector<intern_string_t> mlf_headers;
968
    std::vector<logline_value_meta> mlf_field_defs;
969
    // Column separator; overridden by an Excel-style `sep=<ch>` hint
970
    // on the first line of the file.
971
    char mlf_separator{','};
972

973
    // User-hidden metric column names.  Shared across every
974
    // `metrics_log_format` instance so hides set via
975
    // `:hide-fields metrics_log.<col>` affect every open metric file
976
    // that has the column, and survive file re-detection (which
977
    // rebuilds `mlf_field_defs`).  Only currently-hidden columns are
978
    // stored; `hide_field(name, false)` erases so the set stays
979
    // bounded across hide/show cycles.
980
    static std::set<intern_string_t> mlf_hidden_columns;
981
};
982

983
std::set<intern_string_t> metrics_log_format::mlf_hidden_columns;
984

985
class bro_log_format : public log_format {
986
public:
987
    static const intern_string_t TS;
988
    static const intern_string_t DURATION;
989
    struct field_def {
990
        logline_value_meta fd_meta;
991
        logline_value_meta* fd_root_meta;
992
        std::string fd_collator;
993
        std::optional<size_t> fd_numeric_index;
994

995
        explicit field_def(const intern_string_t name,
709✔
996
                           size_t col,
997
                           log_format* format)
998
            : fd_meta(name,
1,418✔
999
                      value_kind_t::VALUE_TEXT,
1000
                      logline_value_meta::table_column{col},
709✔
1001
                      format),
1002
              fd_root_meta(&FIELD_META.find(name)->second)
709✔
1003
        {
1004
        }
709✔
1005

1006
        field_def& with_kind(value_kind_t kind,
521✔
1007
                             bool identifier = false,
1008
                             bool foreign_key = false,
1009
                             const std::string& collator = "")
1010
        {
1011
            this->fd_meta.lvm_kind = kind;
521✔
1012
            this->fd_meta.lvm_identifier = identifier;
521✔
1013
            this->fd_meta.lvm_foreign_key = foreign_key;
521✔
1014
            this->fd_collator = collator;
521✔
1015
            return *this;
521✔
1016
        }
1017

1018
        field_def& with_numeric_index(size_t index)
131✔
1019
        {
1020
            this->fd_numeric_index = index;
131✔
1021
            return *this;
131✔
1022
        }
1023
    };
1024

1025
    static std::unordered_map<const intern_string_t, logline_value_meta>
1026
        FIELD_META;
1027

1028
    static const intern_string_t get_opid_desc()
934✔
1029
    {
1030
        static const intern_string_t RETVAL = intern_string::lookup("std");
2,802✔
1031

1032
        return RETVAL;
934✔
1033
    }
1034

1035
    bro_log_format()
934✔
1036
    {
934✔
1037
        this->lf_multiline = false;
934✔
1038
        this->lf_structured = true;
934✔
1039
        this->lf_is_self_describing = true;
934✔
1040
        this->lf_time_ordered = false;
934✔
1041
        this->lf_timestamp_point_of_reference
1042
            = timestamp_point_of_reference_t::start;
934✔
1043

1044
        auto desc_v = std::make_shared<std::vector<opid_descriptor>>();
934✔
1045
        desc_v->emplace({});
934✔
1046
        auto emplace_res = this->lf_opid_description_def->emplace(
1,868✔
1047
            get_opid_desc(), opid_descriptors{{}, desc_v, 0});
1,868✔
1048
        this->lf_opid_description_def_vec->emplace_back(
934✔
1049
            &emplace_res.first->second);
934✔
1050
    }
934✔
1051

1052
    const intern_string_t get_name() const override
124,546✔
1053
    {
1054
        static const intern_string_t name(intern_string::lookup("bro"));
126,402✔
1055

1056
        return this->blf_format_name.empty() ? name : this->blf_format_name;
124,546✔
1057
    }
1058

1059
    void clear() override
14,821✔
1060
    {
1061
        this->log_format::clear();
14,821✔
1062
        this->blf_format_name.clear();
14,821✔
1063
        this->blf_field_defs.clear();
14,821✔
1064
    }
14,821✔
1065

1066
    std::vector<logline_value_meta> get_value_metadata() const override
1✔
1067
    {
1068
        std::vector<logline_value_meta> retval;
1✔
1069

1070
        for (const auto& fd : this->blf_field_defs) {
30✔
1071
            retval.emplace_back(fd.fd_meta);
29✔
1072
        }
1073
        return retval;
1✔
1074
    }
×
1075

1076
    scan_result_t scan_int(std::vector<logline>& dst,
4,963✔
1077
                           const line_info& li,
1078
                           shared_buffer_ref& sbr,
1079
                           scan_batch_context& sbc)
1080
    {
1081
        static const intern_string_t STATUS_CODE
1082
            = intern_string::lookup("bro_status_code");
5,011✔
1083
        static const intern_string_t UID = intern_string::lookup("bro_uid");
5,011✔
1084
        static const intern_string_t ID_ORIG_H
1085
            = intern_string::lookup("bro_id_orig_h");
5,011✔
1086

1087
        separated_string ss(sbr.to_string_fragment());
4,963✔
1088
        timeval tv;
1089
        exttm tm;
4,963✔
1090
        size_t found_ts = 0;
4,963✔
1091
        log_level_t level = LEVEL_INFO;
4,963✔
1092
        uint64_t opid_bloom = 0;
4,963✔
1093
        auto opid_cap = string_fragment::invalid();
4,963✔
1094
        auto host_cap = string_fragment::invalid();
4,963✔
1095
        auto duration = std::chrono::microseconds{0};
4,963✔
1096

1097
        sbc.sbc_value_stats.resize(this->blf_field_defs.size());
4,963✔
1098
        ss.with_separator(this->blf_separator.get()[0]);
4,963✔
1099

1100
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
146,619✔
1101
            if (iter.index() == 0 && *iter == "#close"_frag) {
141,683✔
1102
                dst.back().set_ignore(true);
27✔
1103
                return scan_match{2000};
27✔
1104
            }
1105

1106
            if (iter.index() >= this->blf_field_defs.size()) {
141,656✔
1107
                break;
×
1108
            }
1109

1110
            const auto& fd = this->blf_field_defs[iter.index()];
141,656✔
1111

1112
            if (TS == fd.fd_meta.lvm_name) {
141,656✔
1113
                static const char* const TIME_FMT[] = {"%s.%f"};
1114
                const auto sf = *iter;
4,936✔
1115

1116
                if (this->lf_date_time.scan(
4,936✔
1117
                        sf.data(), sf.length(), TIME_FMT, &tm, tv))
4,936✔
1118
                {
1119
                    this->lf_timestamp_flags = tm.et_flags;
4,936✔
1120
                    found_ts += 1;
4,936✔
1121
                }
1122
            } else if (STATUS_CODE == fd.fd_meta.lvm_name) {
136,720✔
1123
                const auto sf = *iter;
4,750✔
1124

1125
                if (!sf.empty() && sf[0] >= '4') {
4,750✔
1126
                    level = LEVEL_ERROR;
24✔
1127
                }
1128
            } else if (UID == fd.fd_meta.lvm_name) {
131,970✔
1129
                opid_cap = *iter;
4,936✔
1130

1131
                opid_bloom = opid_cap.bloom_bits();
4,936✔
1132
            } else if (ID_ORIG_H == fd.fd_meta.lvm_name) {
127,034✔
1133
                host_cap = *iter;
4,936✔
1134
            } else if (DURATION == fd.fd_meta.lvm_name) {
122,098✔
1135
                const auto sf = *iter;
186✔
1136
                auto scan_res = scn::scan<double>("{}", sf.to_string_view());
186✔
1137
                if (scan_res) {
186✔
1138
                    duration = std::chrono::microseconds{
×
1139
                        static_cast<long long>(scan_res->value() * 1000000)};
1140
                }
1141
            }
1142

1143
            if (fd.fd_numeric_index) {
141,656✔
1144
                switch (fd.fd_meta.lvm_kind) {
25,238✔
1145
                    case value_kind_t::VALUE_INTEGER:
25,238✔
1146
                    case value_kind_t::VALUE_FLOAT: {
1147
                        const auto sv = (*iter).to_string_view();
25,238✔
1148
                        auto scan_float_res = scn::scan_value<double>(sv);
25,238✔
1149
                        if (scan_float_res) {
25,238✔
1150
                            sbc.sbc_value_stats[fd.fd_numeric_index.value()]
20,488✔
1151
                                .add_value(scan_float_res->value());
20,488✔
1152
                        }
1153
                        break;
25,238✔
1154
                    }
1155
                    default:
×
1156
                        break;
×
1157
                }
1158
            }
1159
        }
1160

1161
        if (found_ts == 1) {
4,936✔
1162
            auto log_us = to_us(tv);
4,936✔
1163
            if (!this->lf_specialized) {
4,936✔
1164
                for (auto& ll : dst) {
250✔
1165
                    ll.set_time(log_us);
225✔
1166
                    ll.set_ignore(true);
225✔
1167
                }
1168
            }
1169

1170
            if (opid_cap.is_valid()) {
4,936✔
1171
                auto opid_iter = sbc.sbc_opids.insert_op(
4,936✔
1172
                    sbc.sbc_allocator,
1173
                    opid_cap,
1174
                    log_us,
1175
                    this->lf_timestamp_point_of_reference,
1176
                    duration);
1177
                opid_iter->second.otr_level_stats.update_msg_count(level);
4,936✔
1178

1179
                auto& otr = opid_iter->second;
4,936✔
1180
                if (!otr.otr_description.lod_index && host_cap.is_valid()
7,137✔
1181
                    && otr.otr_description.lod_elements.empty())
7,137✔
1182
                {
1183
                    otr.otr_description.lod_index = 0;
2,201✔
1184
                    otr.otr_description.lod_elements.insert(
4,402✔
1185
                        0, host_cap.to_string());
2,201✔
1186
                }
1187
            }
1188

1189
            auto& ll = dst.back();
4,936✔
1190
            ll.set_time(log_us);
4,936✔
1191
            ll.set_level(level);
4,936✔
1192
            ll.set_ignore(false);
4,936✔
1193
            ll.merge_bloom_bits(opid_bloom);
4,936✔
1194
            return scan_match{2000};
4,936✔
1195
        }
1196
        return scan_no_match{"no header found"};
×
1197
    }
1198

1199
    scan_result_t scan(logfile& lf,
14,797✔
1200
                       std::vector<logline>& dst,
1201
                       const line_info& li,
1202
                       shared_buffer_ref& sbr,
1203
                       scan_batch_context& sbc) override
1204
    {
1205
        static const auto SEP_RE
1206
            = lnav::pcre2pp::code::from_const(R"(^#separator\s+(.+))");
14,797✔
1207

1208
        if (dst.size() == 1) {
14,797✔
1209
            auto file_options = lf.get_file_options();
1,465✔
1210

1211
            if (file_options) {
1,465✔
1212
                this->lf_date_time.dts_default_zone
1213
                    = file_options->second.fo_default_zone.pp_value;
60✔
1214
            } else {
1215
                this->lf_date_time.dts_default_zone = nullptr;
1,405✔
1216
            }
1217
        }
1,465✔
1218

1219
        if (!this->blf_format_name.empty()) {
14,797✔
1220
            return this->scan_int(dst, li, sbr, sbc);
4,938✔
1221
        }
1222

1223
        if (dst.size() <= 2 || dst.size() > 20 || sbr.empty()
17,443✔
1224
            || sbr.get_data()[0] == '#')
17,443✔
1225
        {
1226
            return scan_no_match{"no header found"};
6,864✔
1227
        }
1228

1229
        auto line_iter = dst.begin();
2,995✔
1230
        auto read_result = lf.read_raw_message(line_iter);
2,995✔
1231

1232
        if (read_result.isErr()) {
2,995✔
1233
            return scan_no_match{"unable to read first line"};
×
1234
        }
1235

1236
        auto line = read_result.unwrap();
2,995✔
1237
        auto md = SEP_RE.create_match_data();
2,995✔
1238

1239
        auto match_res = SEP_RE.capture_from(line.to_string_fragment())
2,995✔
1240
                             .into(md)
2,995✔
1241
                             .matches(PCRE2_NO_UTF_CHECK)
5,990✔
1242
                             .ignore_error();
2,995✔
1243
        if (!match_res) {
2,995✔
1244
            return scan_no_match{"cannot read separator header"};
2,970✔
1245
        }
1246

1247
        this->clear();
25✔
1248

1249
        auto sep = from_escaped_string(md[1]->data(), md[1]->length());
25✔
1250
        this->blf_separator = intern_string::lookup(sep);
25✔
1251

1252
        for (++line_iter; line_iter != dst.end(); ++line_iter) {
225✔
1253
            if (line_iter->get_sub_offset() != 0) {
200✔
1254
                continue;
25✔
1255
            }
1256
            auto next_read_result = lf.read_raw_message(line_iter);
200✔
1257

1258
            if (next_read_result.isErr()) {
200✔
1259
                return scan_no_match{"unable to read header line"};
×
1260
            }
1261

1262
            line = next_read_result.unwrap();
200✔
1263
            separated_string ss(line.to_string_fragment());
200✔
1264

1265
            ss.with_separator(this->blf_separator.get()[0]);
200✔
1266
            auto iter = ss.begin();
200✔
1267

1268
            string_fragment directive = *iter;
200✔
1269

1270
            if (directive.empty() || directive[0] != '#') {
200✔
1271
                continue;
25✔
1272
            }
1273

1274
            ++iter;
175✔
1275
            if (iter == ss.end()) {
175✔
1276
                continue;
×
1277
            }
1278

1279
            if (directive == "#set_separator") {
175✔
1280
                this->blf_set_separator = intern_string::lookup(*iter);
25✔
1281
            } else if (directive == "#empty_field") {
150✔
1282
                this->blf_empty_field = intern_string::lookup(*iter);
25✔
1283
            } else if (directive == "#unset_field") {
125✔
1284
                this->blf_unset_field = intern_string::lookup(*iter);
25✔
1285
            } else if (directive == "#path") {
100✔
1286
                auto full_name = fmt::format(FMT_STRING("bro_{}_log"), *iter);
75✔
1287
                this->blf_format_name = intern_string::lookup(full_name);
25✔
1288
            } else if (directive == "#fields" && this->blf_field_defs.empty()) {
100✔
1289
                do {
1290
                    auto field_name
1291
                        = intern_string::lookup("bro_" + sql_safe_ident(*iter));
709✔
1292
                    auto common_iter = FIELD_META.find(field_name);
709✔
1293
                    if (common_iter == FIELD_META.end()) {
709✔
1294
                        FIELD_META.emplace(field_name,
703✔
1295
                                           logline_value_meta{
1,406✔
1296
                                               field_name,
1297
                                               value_kind_t::VALUE_TEXT,
1298
                                           });
1299
                    }
1300
                    this->blf_field_defs.emplace_back(
1,418✔
1301
                        field_name, this->blf_field_defs.size(), this);
709✔
1302
                    ++iter;
709✔
1303
                } while (iter != ss.end());
709✔
1304
            } else if (directive == "#types") {
50✔
1305
                static const char* KNOWN_IDS[] = {
1306
                    "bro_conn_uids",
1307
                    "bro_fuid",
1308
                    "bro_host",
1309
                    "bro_info_code",
1310
                    "bro_method",
1311
                    "bro_mime_type",
1312
                    "bro_orig_fuids",
1313
                    "bro_parent_fuid",
1314
                    "bro_proto",
1315
                    "bro_referrer",
1316
                    "bro_resp_fuids",
1317
                    "bro_service",
1318
                    "bro_uid",
1319
                    "bro_uri",
1320
                    "bro_user_agent",
1321
                    "bro_username",
1322
                };
1323
                static const char* KNOWN_FOREIGN[] = {
1324
                    "bro_status_code",
1325
                };
1326

1327
                int numeric_count = 0;
25✔
1328

1329
                do {
1330
                    string_fragment field_type = *iter;
709✔
1331
                    auto& fd = this->blf_field_defs[iter.index() - 1];
709✔
1332

1333
                    if (field_type == "time") {
709✔
1334
                        fd.with_kind(value_kind_t::VALUE_TIMESTAMP);
50✔
1335
                    } else if (field_type == "string") {
684✔
1336
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
522✔
1337
                                                        std::end(KNOWN_IDS),
1338
                                                        fd.fd_meta.lvm_name);
261✔
1339
                        fd.with_kind(value_kind_t::VALUE_TEXT, ident);
522✔
1340
                    } else if (field_type == "count") {
423✔
1341
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
258✔
1342
                                                        std::end(KNOWN_IDS),
1343
                                                        fd.fd_meta.lvm_name);
129✔
1344
                        bool foreign
1345
                            = std::binary_search(std::begin(KNOWN_FOREIGN),
258✔
1346
                                                 std::end(KNOWN_FOREIGN),
1347
                                                 fd.fd_meta.lvm_name);
129✔
1348
                        fd.with_kind(
258✔
1349
                              value_kind_t::VALUE_INTEGER, ident, foreign)
1350
                            .with_numeric_index(numeric_count);
129✔
1351
                        numeric_count += 1;
129✔
1352
                    } else if (field_type == "bool") {
294✔
1353
                        fd.with_kind(value_kind_t::VALUE_BOOLEAN);
8✔
1354
                    } else if (field_type == "addr") {
290✔
1355
                        fd.with_kind(
100✔
1356
                            value_kind_t::VALUE_TEXT, true, false, "ipaddress");
1357
                    } else if (field_type == "port") {
240✔
1358
                        fd.with_kind(value_kind_t::VALUE_INTEGER, true);
100✔
1359
                    } else if (field_type == "interval") {
190✔
1360
                        fd.with_kind(value_kind_t::VALUE_FLOAT)
4✔
1361
                            .with_numeric_index(numeric_count);
2✔
1362
                        numeric_count += 1;
2✔
1363
                    }
1364

1365
                    ++iter;
709✔
1366
                } while (iter != ss.end());
709✔
1367
            }
1368
        }
200✔
1369

1370
        if (!this->blf_format_name.empty() && !this->blf_separator.empty()
50✔
1371
            && !this->blf_field_defs.empty())
50✔
1372
        {
1373
            return this->scan_int(dst, li, sbr, sbc);
25✔
1374
        }
1375

1376
        this->blf_format_name.clear();
×
1377

1378
        return scan_no_match{"no header found"};
×
1379
    }
2,995✔
1380

1381
    void annotate(logfile* lf,
30,358✔
1382
                  uint64_t line_number,
1383
                  string_attrs_t& sa,
1384
                  logline_value_vector& values) const override
1385
    {
1386
        static const intern_string_t UID = intern_string::lookup("bro_uid");
30,398✔
1387

1388
        auto& sbr = values.lvv_sbr;
30,358✔
1389
        separated_string ss(sbr.to_string_fragment());
30,358✔
1390

1391
        ss.with_separator(this->blf_separator.get()[0]);
30,358✔
1392

1393
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
681,188✔
1394
            if (iter.index() >= this->blf_field_defs.size()) {
650,830✔
1395
                return;
×
1396
            }
1397

1398
            const field_def& fd = this->blf_field_defs[iter.index()];
650,830✔
1399
            string_fragment sf = *iter;
650,830✔
1400

1401
            if (sf == this->blf_empty_field) {
650,830✔
1402
                sf.clear();
30,358✔
1403
            } else if (sf == this->blf_unset_field) {
620,472✔
1404
                sf.invalidate();
74,315✔
1405
            }
1406

1407
            auto lr = line_range(sf.sf_begin, sf.sf_end);
650,830✔
1408

1409
            if (fd.fd_meta.lvm_name == TS) {
650,830✔
1410
                sa.emplace_back(lr, L_TIMESTAMP.value());
30,358✔
1411
            } else if (fd.fd_meta.lvm_name == UID) {
620,472✔
1412
                sa.emplace_back(lr, L_OPID.value());
30,358✔
1413
                values.lvv_opid_value = sf.to_string();
30,358✔
1414
                values.lvv_opid_provenance
1415
                    = logline_value_vector::opid_provenance::file;
30,358✔
1416
            }
1417

1418
            if (lr.is_valid()) {
650,830✔
1419
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
576,515✔
1420
            } else {
1421
                values.lvv_values.emplace_back(fd.fd_meta);
74,315✔
1422
            }
1423
            values.lvv_values.back().lv_meta.lvm_user_hidden
650,830✔
1424
                = fd.fd_root_meta->lvm_user_hidden;
650,830✔
1425
        }
1426

1427
        log_format::annotate(lf, line_number, sa, values);
30,358✔
1428
    }
1429

1430
    std::optional<size_t> stats_index_for_value(
36✔
1431
        const intern_string_t& name) const override
1432
    {
1433
        for (const auto& blf_field_def : this->blf_field_defs) {
540✔
1434
            if (blf_field_def.fd_meta.lvm_name == name) {
540✔
1435
                if (!blf_field_def.fd_numeric_index) {
36✔
1436
                    break;
×
1437
                }
1438
                return blf_field_def.fd_numeric_index.value();
36✔
1439
            }
1440
        }
1441

1442
        return std::nullopt;
×
1443
    }
1444

1445
    bool hide_field(intern_string_t field_name, bool val) override
2✔
1446
    {
1447
        if (field_name == LOG_TIME_STR) {
2✔
1448
            field_name = TS;
×
1449
        }
1450

1451
        auto fd_iter = FIELD_META.find(field_name);
2✔
1452
        if (fd_iter == FIELD_META.end()) {
2✔
1453
            return false;
×
1454
        }
1455

1456
        fd_iter->second.lvm_user_hidden = val;
2✔
1457

1458
        return true;
2✔
1459
    }
1460

1461
    std::map<intern_string_t, logline_value_meta> get_field_states() override
97✔
1462
    {
1463
        std::map<intern_string_t, logline_value_meta> retval;
97✔
1464

1465
        for (const auto& fd : FIELD_META) {
213✔
1466
            retval.emplace(fd.first, fd.second);
116✔
1467
        }
1468

1469
        return retval;
97✔
1470
    }
×
1471

1472
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
25✔
1473
    {
1474
        auto retval = std::make_shared<bro_log_format>(*this);
25✔
1475

1476
        retval->lf_specialized = true;
25✔
1477
        for (auto& fd : retval->blf_field_defs) {
734✔
1478
            fd.fd_meta.lvm_format = retval.get();
709✔
1479
        }
1480
        return retval;
50✔
1481
    }
25✔
1482

1483
    class bro_log_table : public log_format_vtab_impl {
1484
    public:
1485
        explicit bro_log_table(std::shared_ptr<const log_format> format)
23✔
1486
            : log_format_vtab_impl(format),
23✔
1487
              blt_format(dynamic_cast<const bro_log_format*>(format.get()))
23✔
1488
        {
1489
        }
23✔
1490

1491
        void get_columns(std::vector<vtab_column>& cols) const override
32✔
1492
        {
1493
            for (const auto& fd : this->blt_format->blf_field_defs) {
944✔
1494
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
912✔
1495
                    fd.fd_meta.lvm_kind);
912✔
1496

1497
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
912✔
1498
                                  type_pair.first,
1499
                                  fd.fd_collator,
912✔
1500
                                  false,
1,824✔
1501
                                  "",
1502
                                  type_pair.second);
1503
            }
1504
        }
32✔
1505

1506
        void get_foreign_keys(
11✔
1507
            std::unordered_set<std::string>& keys_inout) const override
1508
        {
1509
            this->log_vtab_impl::get_foreign_keys(keys_inout);
11✔
1510

1511
            for (const auto& fd : this->blt_format->blf_field_defs) {
322✔
1512
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
311✔
1513
                    keys_inout.emplace(fd.fd_meta.lvm_name.to_string());
136✔
1514
                }
1515
            }
1516
        }
11✔
1517

1518
        const bro_log_format* blt_format;
1519
    };
1520

1521
    static std::map<intern_string_t, std::shared_ptr<bro_log_table>>&
1522
    get_tables()
23✔
1523
    {
1524
        static std::map<intern_string_t, std::shared_ptr<bro_log_table>> retval;
23✔
1525

1526
        return retval;
23✔
1527
    }
1528

1529
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
836✔
1530
    {
1531
        if (this->blf_format_name.empty()) {
836✔
1532
            return nullptr;
813✔
1533
        }
1534

1535
        std::shared_ptr<bro_log_table> retval = nullptr;
23✔
1536

1537
        auto& tables = get_tables();
23✔
1538
        const auto iter = tables.find(this->blf_format_name);
23✔
1539
        if (iter == tables.end()) {
23✔
1540
            retval = std::make_shared<bro_log_table>(this->shared_from_this());
23✔
1541
            tables[this->blf_format_name] = retval;
23✔
1542
        }
1543

1544
        return retval;
23✔
1545
    }
23✔
1546

1547
    void get_subline(const log_format_file_state& lffs,
30,565✔
1548
                     const logline& ll,
1549
                     shared_buffer_ref& sbr,
1550
                     subline_options opts) override
1551
    {
1552
    }
30,565✔
1553

1554
    intern_string_t blf_format_name;
1555
    intern_string_t blf_separator;
1556
    intern_string_t blf_set_separator;
1557
    intern_string_t blf_empty_field;
1558
    intern_string_t blf_unset_field;
1559
    std::vector<field_def> blf_field_defs;
1560
};
1561

1562
std::unordered_map<const intern_string_t, logline_value_meta>
1563
    bro_log_format::FIELD_META;
1564

1565
const intern_string_t bro_log_format::TS = intern_string::lookup("bro_ts");
1566
const intern_string_t bro_log_format::DURATION
1567
    = intern_string::lookup("bro_duration");
1568

1569
struct ws_separated_string {
1570
    const char* ss_str;
1571
    size_t ss_len;
1572

1573
    explicit ws_separated_string(const char* str = nullptr, size_t len = -1)
29,904✔
1574
        : ss_str(str), ss_len(len)
29,904✔
1575
    {
1576
    }
29,904✔
1577

1578
    struct iterator {
1579
        enum class state_t {
1580
            NORMAL,
1581
            QUOTED,
1582
        };
1583

1584
        const ws_separated_string& i_parent;
1585
        const char* i_pos;
1586
        const char* i_next_pos;
1587
        size_t i_index{0};
1588
        state_t i_state{state_t::NORMAL};
1589

1590
        iterator(const ws_separated_string& ss, const char* pos)
78,892✔
1591
            : i_parent(ss), i_pos(pos), i_next_pos(pos)
78,892✔
1592
        {
1593
            this->update();
78,892✔
1594
        }
78,892✔
1595

1596
        void update()
125,099✔
1597
        {
1598
            const auto& ss = this->i_parent;
125,099✔
1599
            bool done = false;
125,099✔
1600

1601
            while (!done && this->i_next_pos < (ss.ss_str + ss.ss_len)) {
2,642,794✔
1602
                switch (this->i_state) {
2,517,695✔
1603
                    case state_t::NORMAL:
1,037,384✔
1604
                        if (*this->i_next_pos == '"') {
1,037,384✔
1605
                            this->i_state = state_t::QUOTED;
68,173✔
1606
                        } else if (isspace(*this->i_next_pos)) {
969,211✔
1607
                            done = true;
65,465✔
1608
                        }
1609
                        break;
1,037,384✔
1610
                    case state_t::QUOTED:
1,480,311✔
1611
                        if (*this->i_next_pos == '"') {
1,480,311✔
1612
                            this->i_state = state_t::NORMAL;
68,160✔
1613
                        }
1614
                        break;
1,480,311✔
1615
                }
1616
                if (!done) {
2,517,695✔
1617
                    this->i_next_pos += 1;
2,452,230✔
1618
                }
1619
            }
1620
        }
125,099✔
1621

1622
        iterator& operator++()
46,207✔
1623
        {
1624
            const auto& ss = this->i_parent;
46,207✔
1625

1626
            this->i_pos = this->i_next_pos;
46,207✔
1627
            while (this->i_pos < (ss.ss_str + ss.ss_len)
46,207✔
1628
                   && isspace(*this->i_pos))
89,800✔
1629
            {
1630
                this->i_pos += 1;
43,593✔
1631
                this->i_next_pos += 1;
43,593✔
1632
            }
1633
            this->update();
46,207✔
1634
            this->i_index += 1;
46,207✔
1635

1636
            return *this;
46,207✔
1637
        }
1638

1639
        string_fragment operator*()
70,616✔
1640
        {
1641
            const auto& ss = this->i_parent;
70,616✔
1642
            int end = this->i_next_pos - ss.ss_str;
70,616✔
1643

1644
            return string_fragment(ss.ss_str, this->i_pos - ss.ss_str, end);
70,616✔
1645
        }
1646

1647
        bool operator==(const iterator& other) const
48,988✔
1648
        {
1649
            return (&this->i_parent == &other.i_parent)
48,988✔
1650
                && (this->i_pos == other.i_pos);
48,988✔
1651
        }
1652

1653
        bool operator!=(const iterator& other) const
46,065✔
1654
        {
1655
            return !(*this == other);
46,065✔
1656
        }
1657

1658
        size_t index() const { return this->i_index; }
86,567✔
1659
    };
1660

1661
    iterator begin() { return {*this, this->ss_str}; }
29,904✔
1662

1663
    iterator end() { return {*this, this->ss_str + this->ss_len}; }
48,988✔
1664
};
1665

1666
class w3c_log_format : public log_format {
1667
public:
1668
    static const intern_string_t F_DATE;
1669
    static const intern_string_t F_TIME;
1670

1671
    struct field_def {
1672
        const intern_string_t fd_name;
1673
        logline_value_meta fd_meta;
1674
        logline_value_meta* fd_root_meta{nullptr};
1675
        std::string fd_collator;
1676
        std::optional<size_t> fd_numeric_index;
1677

1678
        explicit field_def(const intern_string_t name)
22✔
1679
            : fd_name(name), fd_meta(intern_string::lookup(sql_safe_ident(
44✔
1680
                                         name.to_string_fragment())),
44✔
1681
                                     value_kind_t::VALUE_TEXT)
22✔
1682
        {
1683
        }
22✔
1684

1685
        field_def(const intern_string_t name, logline_value_meta meta)
71✔
1686
            : fd_name(name), fd_meta(meta)
71✔
1687
        {
1688
        }
71✔
1689

1690
        field_def(size_t col,
12,512✔
1691
                  const char* name,
1692
                  value_kind_t kind,
1693
                  bool ident = false,
1694
                  bool foreign_key = false,
1695
                  std::string coll = "")
1696
            : fd_name(intern_string::lookup(name)),
25,024✔
1697
              fd_meta(
25,024✔
1698
                  intern_string::lookup(sql_safe_ident(string_fragment(name))),
25,024✔
1699
                  kind,
1700
                  logline_value_meta::table_column{col}),
12,512✔
1701
              fd_collator(std::move(coll))
12,512✔
1702
        {
1703
            this->fd_meta.lvm_identifier = ident;
12,512✔
1704
            this->fd_meta.lvm_foreign_key = foreign_key;
12,512✔
1705
        }
12,512✔
1706

1707
        field_def& with_kind(value_kind_t kind,
1708
                             bool identifier = false,
1709
                             const std::string& collator = "")
1710
        {
1711
            this->fd_meta.lvm_kind = kind;
1712
            this->fd_meta.lvm_identifier = identifier;
1713
            this->fd_collator = collator;
1714
            return *this;
1715
        }
1716

1717
        field_def& with_numeric_index(int index)
51✔
1718
        {
1719
            this->fd_numeric_index = index;
51✔
1720
            return *this;
51✔
1721
        }
1722
    };
1723

1724
    static std::unordered_map<const intern_string_t, logline_value_meta>
1725
        FIELD_META;
1726

1727
    struct field_to_struct_t {
1728
        field_to_struct_t(const char* prefix, const char* struct_name)
3,128✔
1729
            : fs_prefix(prefix),
3,128✔
1730
              fs_struct_name(intern_string::lookup(struct_name))
6,256✔
1731
        {
1732
        }
3,128✔
1733

1734
        const char* fs_prefix;
1735
        intern_string_t fs_struct_name;
1736
    };
1737

1738
    static const std::array<field_def, 16>& get_known_fields()
799✔
1739
    {
1740
        static size_t KNOWN_FIELD_INDEX = 0;
1741
        static const std::array<field_def, 16> RETVAL = {
1742
            field_def{
1743
                KNOWN_FIELD_INDEX++,
1744
                "cs-method",
1745
                value_kind_t::VALUE_TEXT,
1746
                true,
1747
            },
1748
            {
1749
                KNOWN_FIELD_INDEX++,
1750
                "c-ip",
1751
                value_kind_t::VALUE_TEXT,
1752
                true,
1753
                false,
1754
                "ipaddress",
1755
            },
1756
            {
1757
                KNOWN_FIELD_INDEX++,
1758
                "cs-bytes",
1759
                value_kind_t::VALUE_INTEGER,
1760
                false,
1761
            },
1762
            {
1763
                KNOWN_FIELD_INDEX++,
1764
                "cs-host",
1765
                value_kind_t::VALUE_TEXT,
1766
                true,
1767
            },
1768
            {
1769
                KNOWN_FIELD_INDEX++,
1770
                "cs-uri-stem",
1771
                value_kind_t::VALUE_TEXT,
1772
                true,
1773
                false,
1774
                "naturalnocase",
1775
            },
1776
            {
1777
                KNOWN_FIELD_INDEX++,
1778
                "cs-uri-query",
1779
                value_kind_t::VALUE_TEXT,
1780
                false,
1781
            },
1782
            {
1783
                KNOWN_FIELD_INDEX++,
1784
                "cs-username",
1785
                value_kind_t::VALUE_TEXT,
1786
                false,
1787
            },
1788
            {
1789
                KNOWN_FIELD_INDEX++,
1790
                "cs-version",
1791
                value_kind_t::VALUE_TEXT,
1792
                true,
1793
            },
1794
            {
1795
                KNOWN_FIELD_INDEX++,
1796
                "s-ip",
1797
                value_kind_t::VALUE_TEXT,
1798
                true,
1799
                false,
1800
                "ipaddress",
1801
            },
1802
            {
1803
                KNOWN_FIELD_INDEX++,
1804
                "s-port",
1805
                value_kind_t::VALUE_INTEGER,
1806
                true,
1807
            },
1808
            {
1809
                KNOWN_FIELD_INDEX++,
1810
                "s-computername",
1811
                value_kind_t::VALUE_TEXT,
1812
                true,
1813
            },
1814
            {
1815
                KNOWN_FIELD_INDEX++,
1816
                "s-sitename",
1817
                value_kind_t::VALUE_TEXT,
1818
                true,
1819
            },
1820
            {
1821
                KNOWN_FIELD_INDEX++,
1822
                "sc-bytes",
1823
                value_kind_t::VALUE_INTEGER,
1824
                false,
1825
            },
1826
            {
1827
                KNOWN_FIELD_INDEX++,
1828
                "sc-status",
1829
                value_kind_t::VALUE_INTEGER,
1830
                false,
1831
                true,
1832
            },
1833
            {
1834
                KNOWN_FIELD_INDEX++,
1835
                "sc-substatus",
1836
                value_kind_t::VALUE_INTEGER,
1837
                false,
1838
            },
1839
            {
1840
                KNOWN_FIELD_INDEX++,
1841
                "time-taken",
1842
                value_kind_t::VALUE_FLOAT,
1843
                false,
1844
            },
1845
        };
2,363✔
1846

1847
        return RETVAL;
799✔
1848
    }
1849

1850
    static const std::array<field_to_struct_t, 4>& get_known_struct_fields()
796✔
1851
    {
1852
        static const std::array<field_to_struct_t, 4> RETVAL = {
1853
            field_to_struct_t{"cs(", "cs_headers"},
1854
            {"sc(", "sc_headers"},
1855
            {"rs(", "rs_headers"},
1856
            {"sr(", "sr_headers"},
1857
        };
796✔
1858

1859
        return RETVAL;
796✔
1860
    }
1861

1862
    w3c_log_format()
934✔
1863
    {
934✔
1864
        this->lf_multiline = false;
934✔
1865
        this->lf_is_self_describing = true;
934✔
1866
        this->lf_time_ordered = false;
934✔
1867
        this->lf_structured = true;
934✔
1868
    }
934✔
1869

1870
    const intern_string_t get_name() const override
19,791✔
1871
    {
1872
        static const intern_string_t name(intern_string::lookup("w3c_log"));
21,647✔
1873

1874
        return this->wlf_format_name.empty() ? name : this->wlf_format_name;
19,791✔
1875
    }
1876

1877
    void clear() override
18,777✔
1878
    {
1879
        this->log_format::clear();
18,777✔
1880
        this->wlf_time_scanner.clear();
18,777✔
1881
        this->wlf_format_name.clear();
18,777✔
1882
        this->wlf_field_defs.clear();
18,777✔
1883
    }
18,777✔
1884

1885
    std::vector<logline_value_meta> get_value_metadata() const override
×
1886
    {
1887
        std::vector<logline_value_meta> retval;
×
1888

1889
        for (const auto& fd : this->wlf_field_defs) {
×
1890
            retval.emplace_back(fd.fd_meta);
×
1891
        }
1892
        return retval;
×
1893
    }
×
1894

1895
    scan_result_t scan_int(std::vector<logline>& dst,
1,315✔
1896
                           const line_info& li,
1897
                           shared_buffer_ref& sbr,
1898
                           scan_batch_context& sbc)
1899
    {
1900
        static const intern_string_t F_DATE_LOCAL
1901
            = intern_string::lookup("date-local");
1,349✔
1902
        static const intern_string_t F_DATE_UTC
1903
            = intern_string::lookup("date-UTC");
1,349✔
1904
        static const intern_string_t F_TIME_LOCAL
1905
            = intern_string::lookup("time-local");
1,349✔
1906
        static const intern_string_t F_TIME_UTC
1907
            = intern_string::lookup("time-UTC");
1,349✔
1908
        static const intern_string_t F_STATUS_CODE
1909
            = intern_string::lookup("sc-status");
1,349✔
1910

1911
        ws_separated_string ss(sbr.get_data(), sbr.length());
1,315✔
1912
        timeval date_tv{0, 0}, time_tv{0, 0};
1,315✔
1913
        exttm date_tm, time_tm;
1,315✔
1914
        size_t found_date = 0;
1,315✔
1915
        size_t found_time = 0;
1,315✔
1916
        log_level_t level = LEVEL_INFO;
1,315✔
1917

1918
        sbc.sbc_value_stats.resize(this->wlf_field_defs.size());
1,315✔
1919
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
19,693✔
1920
            if (iter.index() >= this->wlf_field_defs.size()) {
18,588✔
1921
                level = LEVEL_INVALID;
1✔
1922
                break;
1✔
1923
            }
1924

1925
            const auto& fd = this->wlf_field_defs[iter.index()];
18,587✔
1926
            string_fragment sf = *iter;
18,587✔
1927

1928
            if (sf.startswith("#")) {
18,587✔
1929
                if (sf == "#Date:") {
209✔
1930
                    auto sbr_sf_opt
1931
                        = sbr.to_string_fragment().consume_n(sf.length());
53✔
1932

1933
                    if (sbr_sf_opt) {
53✔
1934
                        auto sbr_sf = sbr_sf_opt.value().trim();
53✔
1935
                        date_time_scanner dts;
53✔
1936
                        exttm tm;
53✔
1937
                        timeval tv;
1938

1939
                        if (dts.scan(sbr_sf.data(),
53✔
1940
                                     sbr_sf.length(),
53✔
1941
                                     nullptr,
1942
                                     &tm,
1943
                                     tv))
1944
                        {
1945
                            this->lf_date_time.set_base_time(tv.tv_sec,
52✔
1946
                                                             tm.et_tm);
1947
                            this->wlf_time_scanner.set_base_time(tv.tv_sec,
52✔
1948
                                                                 tm.et_tm);
1949
                        }
1950
                    }
1951
                }
1952
                auto& ll = dst.back();
209✔
1953
                ll.set_level(LEVEL_UNKNOWN);
209✔
1954
                ll.set_ignore(true);
209✔
1955
                return scan_match{2000};
209✔
1956
            }
1957

1958
            sf = sf.trim("\" \t");
18,378✔
1959
            if (F_DATE == fd.fd_name || F_DATE_LOCAL == fd.fd_name
35,694✔
1960
                || F_DATE_UTC == fd.fd_name)
35,694✔
1961
            {
1962
                if (this->lf_date_time.scan(
1,070✔
1963
                        sf.data(), sf.length(), nullptr, &date_tm, date_tv))
1,070✔
1964
                {
1965
                    this->lf_timestamp_flags |= date_tm.et_flags;
1,069✔
1966
                    found_date += 1;
1,069✔
1967
                }
1968
            } else if (F_TIME == fd.fd_name || F_TIME_LOCAL == fd.fd_name
33,523✔
1969
                       || F_TIME_UTC == fd.fd_name)
33,523✔
1970
            {
1971
                if (this->wlf_time_scanner.scan(
1,101✔
1972
                        sf.data(), sf.length(), nullptr, &time_tm, time_tv))
1,101✔
1973
                {
1974
                    this->lf_timestamp_flags |= time_tm.et_flags;
1,101✔
1975
                    found_time += 1;
1,101✔
1976
                }
1977
            } else if (F_STATUS_CODE == fd.fd_name) {
16,207✔
1978
                if (!sf.empty() && sf[0] >= '4') {
1,098✔
1979
                    level = LEVEL_ERROR;
1,018✔
1980
                }
1981
            }
1982

1983
            if (fd.fd_numeric_index) {
18,378✔
1984
                switch (fd.fd_meta.lvm_kind) {
6,402✔
1985
                    case value_kind_t::VALUE_INTEGER:
6,402✔
1986
                    case value_kind_t::VALUE_FLOAT: {
1987
                        auto scan_float_res
1988
                            = scn::scan_value<double>(sf.to_string_view());
6,402✔
1989

1990
                        if (scan_float_res) {
6,402✔
1991
                            sbc.sbc_value_stats[fd.fd_numeric_index.value()]
6,398✔
1992
                                .add_value(scan_float_res->value());
6,398✔
1993
                        }
1994
                        break;
6,402✔
1995
                    }
1996
                    default:
×
1997
                        break;
×
1998
                }
1999
            }
2000
        }
2001

2002
        if (found_time == 1 && found_date <= 1) {
1,106✔
2003
            auto tm = time_tm;
1,101✔
2004

2005
            if (found_date) {
1,101✔
2006
                tm.et_tm.tm_year = date_tm.et_tm.tm_year;
1,069✔
2007
                tm.et_tm.tm_mday = date_tm.et_tm.tm_mday;
1,069✔
2008
                tm.et_tm.tm_mon = date_tm.et_tm.tm_mon;
1,069✔
2009
                tm.et_tm.tm_wday = date_tm.et_tm.tm_wday;
1,069✔
2010
                tm.et_tm.tm_yday = date_tm.et_tm.tm_yday;
1,069✔
2011
            }
2012

2013
            auto tv = tm.to_timeval();
1,101✔
2014
            if (!this->lf_specialized) {
1,101✔
2015
                for (auto& ll : dst) {
84✔
2016
                    ll.set_time(tv);
70✔
2017
                    ll.set_ignore(true);
70✔
2018
                }
2019
            }
2020
            auto& ll = dst.back();
1,101✔
2021
            ll.set_time(tv);
1,101✔
2022
            ll.set_level(level);
1,101✔
2023
            ll.set_ignore(false);
1,101✔
2024
            return scan_match{2000};
1,101✔
2025
        }
2026

2027
        return scan_no_match{"no header found"};
5✔
2028
    }
2029

2030
    scan_result_t scan(logfile& lf,
14,821✔
2031
                       std::vector<logline>& dst,
2032
                       const line_info& li,
2033
                       shared_buffer_ref& sbr,
2034
                       scan_batch_context& sbc) override
2035
    {
2036
        static const auto* W3C_LOG_NAME = intern_string::lookup("w3c_log");
16,385✔
2037
        static const auto* X_FIELDS_NAME = intern_string::lookup("x_fields");
16,385✔
2038
        static const auto& KNOWN_FIELDS = get_known_fields();
14,821✔
2039
        static const auto& KNOWN_STRUCT_FIELDS = get_known_struct_fields();
14,821✔
2040
        static auto X_FIELDS_IDX = 0;
2041

2042
        if (li.li_partial) {
14,821✔
2043
            return scan_incomplete{};
26✔
2044
        }
2045

2046
        if (dst.size() == 1) {
14,795✔
2047
            auto file_options = lf.get_file_options();
1,461✔
2048

2049
            if (file_options) {
1,461✔
2050
                this->lf_date_time.dts_default_zone
2051
                    = file_options->second.fo_default_zone.pp_value;
60✔
2052
            } else {
2053
                this->lf_date_time.dts_default_zone = nullptr;
1,401✔
2054
            }
2055
        }
1,461✔
2056

2057
        if (!this->wlf_format_name.empty()) {
14,795✔
2058
            return this->scan_int(dst, li, sbr, sbc);
1,296✔
2059
        }
2060

2061
        if (dst.size() < 2 || dst.size() > 20 || sbr.empty()
25,537✔
2062
            || sbr.get_data()[0] == '#')
25,537✔
2063
        {
2064
            return scan_no_match{"no header found"};
9,518✔
2065
        }
2066

2067
        this->clear();
3,981✔
2068

2069
        for (auto line_iter = dst.begin(); line_iter != dst.end(); ++line_iter)
32,341✔
2070
        {
2071
            if (line_iter->get_sub_offset() != 0) {
28,360✔
2072
                continue;
25,473✔
2073
            }
2074
            auto next_read_result = lf.read_raw_message(line_iter);
27,123✔
2075

2076
            if (next_read_result.isErr()) {
27,123✔
2077
                return scan_no_match{"unable to read first line"};
×
2078
            }
2079

2080
            auto line = next_read_result.unwrap();
27,123✔
2081
            ws_separated_string ss(line.get_data(), line.length());
27,123✔
2082
            auto iter = ss.begin();
27,123✔
2083
            const auto directive = *iter;
27,123✔
2084

2085
            if (directive.empty() || directive[0] != '#') {
27,123✔
2086
                continue;
24,200✔
2087
            }
2088

2089
            ++iter;
2,923✔
2090
            if (iter == ss.end()) {
2,923✔
2091
                continue;
36✔
2092
            }
2093

2094
            if (directive == "#Date:") {
2,887✔
2095
                date_time_scanner dts;
12✔
2096
                struct exttm tm;
12✔
2097
                struct timeval tv;
2098

2099
                if (dts.scan(line.get_data_at(directive.length() + 1),
12✔
2100
                             line.length() - directive.length() - 1,
12✔
2101
                             nullptr,
2102
                             &tm,
2103
                             tv))
2104
                {
2105
                    this->lf_date_time.set_base_time(tv.tv_sec, tm.et_tm);
10✔
2106
                    this->wlf_time_scanner.set_base_time(tv.tv_sec, tm.et_tm);
10✔
2107
                }
2108
            } else if (directive == "#Fields:" && this->wlf_field_defs.empty())
2,875✔
2109
            {
2110
                int numeric_count = 0;
19✔
2111

2112
                do {
2113
                    auto sf = (*iter).trim(")");
210✔
2114

2115
                    auto field_iter = std::find_if(
630✔
2116
                        begin(KNOWN_FIELDS),
2117
                        end(KNOWN_FIELDS),
2118
                        [&sf](auto elem) { return sf == elem.fd_name; });
2,400✔
2119
                    if (field_iter != end(KNOWN_FIELDS)) {
420✔
2120
                        this->wlf_field_defs.emplace_back(*field_iter);
117✔
2121
                        auto& fd = this->wlf_field_defs.back();
117✔
2122
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
117✔
2123
                        if (common_iter == FIELD_META.end()) {
117✔
2124
                            auto emp_res = FIELD_META.emplace(
116✔
2125
                                fd.fd_meta.lvm_name, fd.fd_meta);
116✔
2126
                            common_iter = emp_res.first;
116✔
2127
                        }
2128
                        fd.fd_root_meta = &common_iter->second;
117✔
2129
                    } else if (sf.is_one_of("date", "time")) {
93✔
2130
                        this->wlf_field_defs.emplace_back(
44✔
2131
                            intern_string::lookup(sf));
22✔
2132
                        auto& fd = this->wlf_field_defs.back();
22✔
2133
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
22✔
2134
                        if (common_iter == FIELD_META.end()) {
22✔
2135
                            auto emp_res = FIELD_META.emplace(
21✔
2136
                                fd.fd_meta.lvm_name, fd.fd_meta);
21✔
2137
                            common_iter = emp_res.first;
21✔
2138
                        }
2139
                        fd.fd_root_meta = &common_iter->second;
22✔
2140
                    } else {
2141
                        const auto fs_iter = std::find_if(
213✔
2142
                            begin(KNOWN_STRUCT_FIELDS),
2143
                            end(KNOWN_STRUCT_FIELDS),
2144
                            [&sf](auto elem) {
221✔
2145
                                return sf.startswith(elem.fs_prefix);
221✔
2146
                            });
2147
                        if (fs_iter != end(KNOWN_STRUCT_FIELDS)) {
142✔
2148
                            const intern_string_t field_name
2149
                                = intern_string::lookup(sf.substr(3));
21✔
2150
                            this->wlf_field_defs.emplace_back(
21✔
2151
                                field_name,
2152
                                logline_value_meta(
42✔
2153
                                    field_name,
2154
                                    value_kind_t::VALUE_TEXT,
2155
                                    logline_value_meta::table_column{
×
2156
                                        KNOWN_FIELDS.size() + 1
21✔
2157
                                        + std::distance(
63✔
2158
                                            begin(KNOWN_STRUCT_FIELDS),
2159
                                            fs_iter)},
2160
                                    this)
42✔
2161
                                    .with_struct_name(fs_iter->fs_struct_name));
2162
                        } else {
2163
                            const intern_string_t field_name
2164
                                = intern_string::lookup(sf);
50✔
2165
                            this->wlf_field_defs.emplace_back(
50✔
2166
                                field_name,
2167
                                logline_value_meta(
100✔
2168
                                    field_name,
2169
                                    value_kind_t::VALUE_TEXT,
2170
                                    logline_value_meta::table_column{
×
2171
                                        KNOWN_FIELDS.size() + X_FIELDS_IDX},
100✔
2172
                                    this)
100✔
2173
                                    .with_struct_name(X_FIELDS_NAME));
2174
                        }
2175
                    }
2176
                    auto& fd = this->wlf_field_defs.back();
210✔
2177
                    fd.fd_meta.lvm_format = std::make_optional(this);
210✔
2178
                    switch (fd.fd_meta.lvm_kind) {
210✔
2179
                        case value_kind_t::VALUE_FLOAT:
51✔
2180
                        case value_kind_t::VALUE_INTEGER:
2181
                            fd.with_numeric_index(numeric_count);
51✔
2182
                            numeric_count += 1;
51✔
2183
                            break;
51✔
2184
                        default:
159✔
2185
                            break;
159✔
2186
                    }
2187

2188
                    ++iter;
210✔
2189
                } while (iter != ss.end());
210✔
2190

2191
                this->wlf_format_name = W3C_LOG_NAME;
19✔
2192
            }
2193
        }
51,359✔
2194

2195
        if (!this->wlf_format_name.empty() && !this->wlf_field_defs.empty()) {
3,981✔
2196
            return this->scan_int(dst, li, sbr, sbc);
19✔
2197
        }
2198

2199
        this->wlf_format_name.clear();
3,962✔
2200

2201
        return scan_no_match{"no header found"};
3,962✔
2202
    }
2203

2204
    void annotate(logfile* lf,
1,466✔
2205
                  uint64_t line_number,
2206
                  string_attrs_t& sa,
2207
                  logline_value_vector& values) const override
2208
    {
2209
        auto& sbr = values.lvv_sbr;
1,466✔
2210
        ws_separated_string ss(sbr.get_data(), sbr.length());
1,466✔
2211
        std::optional<line_range> date_lr;
1,466✔
2212
        std::optional<line_range> time_lr;
1,466✔
2213

2214
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
26,162✔
2215
            auto sf = *iter;
24,696✔
2216

2217
            if (iter.index() >= this->wlf_field_defs.size()) {
24,696✔
2218
                sa.emplace_back(line_range{sf.sf_begin, -1},
×
2219
                                SA_INVALID.value("extra fields detected"s));
×
2220
                return;
×
2221
            }
2222

2223
            const auto& fd = this->wlf_field_defs[iter.index()];
24,696✔
2224

2225
            if (sf == "-") {
24,696✔
2226
                sf.invalidate();
4,300✔
2227
            }
2228

2229
            auto lr = line_range(sf.sf_begin, sf.sf_end);
24,696✔
2230

2231
            if (lr.is_valid()) {
24,696✔
2232
                if (fd.fd_meta.lvm_name == F_DATE) {
20,396✔
2233
                    date_lr = lr;
1,444✔
2234
                } else if (fd.fd_meta.lvm_name == F_TIME) {
18,952✔
2235
                    time_lr = lr;
1,458✔
2236
                }
2237
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
20,396✔
2238
                if (sf.startswith("\"")) {
20,396✔
2239
                    auto& meta = values.lvv_values.back().lv_meta;
28✔
2240

2241
                    if (meta.lvm_kind == value_kind_t::VALUE_TEXT) {
28✔
2242
                        meta.lvm_kind = value_kind_t::VALUE_W3C_QUOTED;
26✔
2243
                    } else {
2244
                        meta.lvm_kind = value_kind_t::VALUE_NULL;
2✔
2245
                    }
2246
                }
2247
            } else {
2248
                values.lvv_values.emplace_back(fd.fd_meta);
4,300✔
2249
            }
2250
            if (fd.fd_root_meta != nullptr) {
24,696✔
2251
                values.lvv_values.back().lv_meta.lvm_user_hidden
20,318✔
2252
                    = fd.fd_root_meta->lvm_user_hidden;
20,318✔
2253
            }
2254
        }
2255
        if (time_lr) {
1,466✔
2256
            auto ts_lr = time_lr.value();
1,458✔
2257
            if (date_lr) {
1,458✔
2258
                if (date_lr->lr_end + 1 == time_lr->lr_start) {
1,443✔
2259
                    ts_lr.lr_start = date_lr->lr_start;
1,442✔
2260
                    ts_lr.lr_end = time_lr->lr_end;
1,442✔
2261
                }
2262
            }
2263

2264
            sa.emplace_back(ts_lr, L_TIMESTAMP.value());
1,458✔
2265
        }
2266
        log_format::annotate(lf, line_number, sa, values);
1,466✔
2267
    }
2268

2269
    std::optional<size_t> stats_index_for_value(
×
2270
        const intern_string_t& name) const override
2271
    {
2272
        for (const auto& wlf_field_def : this->wlf_field_defs) {
×
2273
            if (wlf_field_def.fd_meta.lvm_name == name) {
×
2274
                if (!wlf_field_def.fd_numeric_index) {
×
2275
                    break;
×
2276
                }
2277
                return wlf_field_def.fd_numeric_index.value();
×
2278
            }
2279
        }
2280

2281
        return std::nullopt;
×
2282
    }
2283

2284
    bool hide_field(const intern_string_t field_name, bool val) override
×
2285
    {
2286
        if (field_name == LOG_TIME_STR) {
×
2287
            auto date_iter = FIELD_META.find(F_DATE);
×
2288
            auto time_iter = FIELD_META.find(F_TIME);
×
2289
            if (date_iter == FIELD_META.end() || time_iter == FIELD_META.end())
×
2290
            {
2291
                return false;
×
2292
            }
2293
            date_iter->second.lvm_user_hidden = val;
×
2294
            time_iter->second.lvm_user_hidden = val;
×
2295
            return true;
×
2296
        }
2297

2298
        auto fd_iter = FIELD_META.find(field_name);
×
2299
        if (fd_iter == FIELD_META.end()) {
×
2300
            return false;
×
2301
        }
2302

2303
        fd_iter->second.lvm_user_hidden = val;
×
2304

2305
        return true;
×
2306
    }
2307

2308
    std::map<intern_string_t, logline_value_meta> get_field_states() override
97✔
2309
    {
2310
        std::map<intern_string_t, logline_value_meta> retval;
97✔
2311

2312
        for (const auto& fd : FIELD_META) {
153✔
2313
            retval.emplace(fd.first, fd.second);
56✔
2314
        }
2315

2316
        return retval;
97✔
2317
    }
×
2318

2319
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
14✔
2320
    {
2321
        auto retval = std::make_shared<w3c_log_format>(*this);
14✔
2322

2323
        retval->lf_specialized = true;
14✔
2324
        return retval;
28✔
2325
    }
14✔
2326

2327
    class w3c_log_table : public log_format_vtab_impl {
2328
    public:
2329
        explicit w3c_log_table(std::shared_ptr<const log_format> format)
11✔
2330
            : log_format_vtab_impl(format)
11✔
2331
        {
2332
        }
11✔
2333

2334
        void get_columns(std::vector<vtab_column>& cols) const override
14✔
2335
        {
2336
            for (const auto& fd : get_known_fields()) {
238✔
2337
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
224✔
2338
                    fd.fd_meta.lvm_kind);
224✔
2339

2340
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
224✔
2341
                                  type_pair.first,
2342
                                  fd.fd_collator,
224✔
2343
                                  false,
448✔
2344
                                  "",
2345
                                  type_pair.second);
2346
            }
2347
            cols.emplace_back("x_fields");
14✔
2348
            cols.back().with_comment(
28✔
2349
                "A JSON-object that contains fields that are not first-class "
2350
                "columns");
2351
            for (const auto& fs : get_known_struct_fields()) {
70✔
2352
                cols.emplace_back(fs.fs_struct_name.to_string());
56✔
2353
            }
2354
        }
14✔
2355

2356
        void get_foreign_keys(
3✔
2357
            std::unordered_set<std::string>& keys_inout) const override
2358
        {
2359
            this->log_vtab_impl::get_foreign_keys(keys_inout);
3✔
2360

2361
            for (const auto& fd : get_known_fields()) {
51✔
2362
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
48✔
2363
                    keys_inout.emplace(fd.fd_meta.lvm_name.to_string());
30✔
2364
                }
2365
            }
2366
        }
3✔
2367
    };
2368

2369
    static std::map<intern_string_t, std::shared_ptr<w3c_log_table>>&
2370
    get_tables()
11✔
2371
    {
2372
        static std::map<intern_string_t, std::shared_ptr<w3c_log_table>> retval;
11✔
2373

2374
        return retval;
11✔
2375
    }
2376

2377
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
824✔
2378
    {
2379
        if (this->wlf_format_name.empty()) {
824✔
2380
            return nullptr;
813✔
2381
        }
2382

2383
        std::shared_ptr<w3c_log_table> retval = nullptr;
11✔
2384

2385
        auto& tables = get_tables();
11✔
2386
        const auto iter = tables.find(this->wlf_format_name);
11✔
2387
        if (iter == tables.end()) {
11✔
2388
            retval = std::make_shared<w3c_log_table>(this->shared_from_this());
11✔
2389
            tables[this->wlf_format_name] = retval;
11✔
2390
        }
2391

2392
        return retval;
11✔
2393
    }
11✔
2394

2395
    void get_subline(const log_format_file_state& lffs,
1,549✔
2396
                     const logline& ll,
2397
                     shared_buffer_ref& sbr,
2398
                     subline_options opts) override
2399
    {
2400
    }
1,549✔
2401

2402
    date_time_scanner wlf_time_scanner;
2403
    intern_string_t wlf_format_name;
2404
    std::vector<field_def> wlf_field_defs;
2405
};
2406

2407
std::unordered_map<const intern_string_t, logline_value_meta>
2408
    w3c_log_format::FIELD_META;
2409

2410
const intern_string_t w3c_log_format::F_DATE = intern_string::lookup("date");
2411
const intern_string_t w3c_log_format::F_TIME = intern_string::lookup("time");
2412

2413
struct logfmt_pair_handler {
2414
    explicit logfmt_pair_handler(date_time_scanner& dts) : lph_dt_scanner(dts)
14,796✔
2415
    {
2416
    }
14,796✔
2417

2418
    log_format::scan_result_t process_value(const string_fragment& value_frag)
4,296✔
2419
    {
2420
        if (this->lph_key_frag.is_one_of(
4,296✔
2421
                "timestamp"_frag, "time"_frag, "ts"_frag, "t"_frag))
2422
        {
2423
            if (!this->lph_dt_scanner.scan(value_frag.data(),
49✔
2424
                                           value_frag.length(),
49✔
2425
                                           nullptr,
2426
                                           &this->lph_time_tm,
2427
                                           this->lph_tv))
49✔
2428
            {
2429
                return log_format::scan_no_match{
12✔
2430
                    "timestamp value did not parse correctly"};
12✔
2431
            }
2432
            char buf[1024];
2433
            this->lph_dt_scanner.ftime(
37✔
2434
                buf, sizeof(buf), nullptr, this->lph_time_tm);
37✔
2435
            this->lph_found_time += 1;
37✔
2436
        } else if (this->lph_key_frag.is_one_of("level"_frag, "lvl"_frag)) {
4,247✔
2437
            this->lph_level
2438
                = string2level(value_frag.data(), value_frag.length());
46✔
2439
        }
2440
        return log_format::scan_match{};
4,284✔
2441
    }
2442

2443
    date_time_scanner& lph_dt_scanner;
2444
    size_t lph_found_time{0};
2445
    exttm lph_time_tm;
2446
    timeval lph_tv{0, 0};
2447
    log_level_t lph_level{log_level_t::LEVEL_INFO};
2448
    string_fragment lph_key_frag{""};
2449
};
2450

2451
class logfmt_format : public log_format {
2452
public:
2453
    const intern_string_t get_name() const override
18,276✔
2454
    {
2455
        const static intern_string_t NAME = intern_string::lookup("logfmt_log");
20,132✔
2456

2457
        return NAME;
18,276✔
2458
    }
2459

2460
    class logfmt_log_table : public log_format_vtab_impl {
2461
    public:
2462
        logfmt_log_table(std::shared_ptr<const log_format> format)
813✔
2463
            : log_format_vtab_impl(format)
813✔
2464
        {
2465
        }
813✔
2466

2467
        void get_columns(std::vector<vtab_column>& cols) const override
814✔
2468
        {
2469
            static const auto FIELDS = std::string("fields");
2,440✔
2470

2471
            cols.emplace_back(FIELDS);
814✔
2472
        }
814✔
2473
    };
2474

2475
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
813✔
2476
    {
2477
        static auto retval
2478
            = std::make_shared<logfmt_log_table>(this->shared_from_this());
813✔
2479

2480
        return retval;
813✔
2481
    }
2482

2483
    scan_result_t scan(logfile& lf,
14,796✔
2484
                       std::vector<logline>& dst,
2485
                       const line_info& li,
2486
                       shared_buffer_ref& sbr,
2487
                       scan_batch_context& sbc) override
2488
    {
2489
        auto p = logfmt::parser(sbr.to_string_fragment());
14,796✔
2490
        scan_result_t retval = scan_no_match{};
14,796✔
2491
        bool done = false;
14,796✔
2492
        logfmt_pair_handler lph(this->lf_date_time);
14,796✔
2493

2494
        if (dst.size() == 1) {
14,796✔
2495
            auto file_options = lf.get_file_options();
1,464✔
2496

2497
            if (file_options) {
1,464✔
2498
                this->lf_date_time.dts_default_zone
2499
                    = file_options->second.fo_default_zone.pp_value;
60✔
2500
            } else {
2501
                this->lf_date_time.dts_default_zone = nullptr;
1,404✔
2502
            }
2503
        }
1,464✔
2504

2505
        while (!done) {
52,015✔
2506
            auto parse_result = p.step();
37,219✔
2507

2508
            auto value_res = parse_result.match(
2509
                [&done](const logfmt::parser::end_of_input&) -> scan_result_t {
×
2510
                    done = true;
14,388✔
2511
                    return scan_match{};
14,388✔
2512
                },
2513
                [](const string_fragment&) -> scan_result_t {
×
2514
                    return scan_incomplete{};
18,139✔
2515
                },
2516
                [&lph](const logfmt::parser::kvpair& kvp) -> scan_result_t {
×
2517
                    lph.lph_key_frag = kvp.first;
4,296✔
2518

2519
                    return kvp.second.match(
2520
                        [](const logfmt::parser::bool_value& bv)
×
2521
                            -> scan_result_t { return scan_match{}; },
×
2522
                        [&lph](const logfmt::parser::float_value& fv)
×
2523
                            -> scan_result_t {
2524
                            return lph.process_value(fv.fv_str_value);
5✔
2525
                        },
2526
                        [&lph](const logfmt::parser::int_value& iv)
×
2527
                            -> scan_result_t {
2528
                            return lph.process_value(iv.iv_str_value);
112✔
2529
                        },
2530
                        [&lph](const logfmt::parser::quoted_value& qv)
×
2531
                            -> scan_result_t {
2532
                            auto_mem<yajl_handle_t> handle(yajl_free);
353✔
2533
                            yajl_callbacks cb;
2534
                            scan_result_t retval;
353✔
2535

2536
                            memset(&cb, 0, sizeof(cb));
353✔
2537
                            handle = yajl_alloc(&cb, nullptr, &lph);
353✔
2538
                            cb.yajl_string = +[](void* ctx,
706✔
2539
                                                 const unsigned char* str,
2540
                                                 size_t len,
2541
                                                 yajl_string_props_t*) -> int {
2542
                                auto& lph = *((logfmt_pair_handler*) ctx);
353✔
2543
                                string_fragment value_frag{str, 0, (int) len};
353✔
2544

2545
                                auto value_res = lph.process_value(value_frag);
353✔
2546
                                return value_res.is<scan_match>();
706✔
2547
                            };
706✔
2548

2549
                            if (yajl_parse(
353✔
2550
                                    handle,
2551
                                    (const unsigned char*) qv.qv_value.data(),
353✔
2552
                                    qv.qv_value.length())
353✔
2553
                                    != yajl_status_ok
2554
                                || yajl_complete_parse(handle)
353✔
2555
                                    != yajl_status_ok)
2556
                            {
2557
                                log_debug("json parsing failed");
×
2558
                                string_fragment unq_frag{
2559
                                    qv.qv_value.sf_string,
×
2560
                                    qv.qv_value.sf_begin + 1,
×
2561
                                    qv.qv_value.sf_end - 1,
×
2562
                                };
2563

2564
                                return lph.process_value(unq_frag);
×
2565
                            }
2566

2567
                            return scan_match{};
353✔
2568
                        },
353✔
2569
                        [&lph](const logfmt::parser::unquoted_value& uv)
4,296✔
2570
                            -> scan_result_t {
2571
                            return lph.process_value(uv.uv_value);
3,826✔
2572
                        });
8,592✔
2573
                },
2574
                [](const logfmt::parser::error& err) -> scan_result_t {
×
2575
                    // log_error("logfmt parse error: %s", err.e_msg.c_str());
2576
                    return scan_no_match{};
396✔
2577
                });
37,219✔
2578
            if (value_res.is<scan_no_match>()) {
37,219✔
2579
                retval = value_res;
408✔
2580
                done = true;
408✔
2581
            }
2582
        }
37,219✔
2583

2584
        if (lph.lph_found_time == 1) {
14,796✔
2585
            this->lf_timestamp_flags = lph.lph_time_tm.et_flags;
37✔
2586
            auto& ll = dst.back();
37✔
2587
            ll.set_time(lph.lph_tv);
37✔
2588
            ll.set_level(lph.lph_level);
37✔
2589
            retval = scan_match{500};
37✔
2590
        }
2591

2592
        return retval;
29,592✔
2593
    }
×
2594

2595
    void annotate(logfile* lf,
16✔
2596
                  uint64_t line_number,
2597
                  string_attrs_t& sa,
2598
                  logline_value_vector& values) const override
2599
    {
2600
        static const intern_string_t FIELDS_NAME
2601
            = intern_string::lookup("fields");
22✔
2602

2603
        auto& sbr = values.lvv_sbr;
16✔
2604
        auto p = logfmt::parser(sbr.to_string_fragment());
16✔
2605
        auto done = false;
16✔
2606
        size_t found_body = 0;
16✔
2607

2608
        while (!done) {
133✔
2609
            auto parse_result = p.step();
117✔
2610

2611
            done = parse_result.match(
234✔
2612
                [](const logfmt::parser::end_of_input&) { return true; },
16✔
2613
                [](const string_fragment&) { return false; },
×
2614
                [this, &sa, &values, &found_body](
×
2615
                    const logfmt::parser::kvpair& kvp) {
2616
                    auto value_frag = kvp.second.match(
101✔
2617
                        [this, &kvp, &values](
×
2618
                            const logfmt::parser::bool_value& bv) {
2619
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2620
                                                              kvp.first),
×
2621
                                                          value_kind_t::
2622
                                                              VALUE_INTEGER,
2623
                                                          logline_value_meta::
2624
                                                              table_column{0},
×
2625
                                                          (log_format*) this}
×
2626
                                           .with_struct_name(FIELDS_NAME);
×
2627
                            values.lvv_values.emplace_back(lvm, bv.bv_value);
×
2628
                            values.lvv_values.back().lv_origin
×
2629
                                = to_line_range(bv.bv_str_value);
×
2630

2631
                            return bv.bv_str_value;
×
2632
                        },
×
2633
                        [this, &kvp, &values](
×
2634
                            const logfmt::parser::int_value& iv) {
2635
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2636
                                                              kvp.first),
×
2637
                                                          value_kind_t::
2638
                                                              VALUE_INTEGER,
2639
                                                          logline_value_meta::
2640
                                                              table_column{0},
×
2641
                                                          (log_format*) this}
×
2642
                                           .with_struct_name(FIELDS_NAME);
×
2643
                            values.lvv_values.emplace_back(lvm, iv.iv_value);
×
2644
                            values.lvv_values.back().lv_origin
×
2645
                                = to_line_range(iv.iv_str_value);
×
2646
                            return iv.iv_str_value;
×
2647
                        },
×
2648
                        [this, &kvp, &values](
101✔
2649
                            const logfmt::parser::float_value& fv) {
2650
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2651
                                                              kvp.first),
×
2652
                                                          value_kind_t::
2653
                                                              VALUE_INTEGER,
2654
                                                          logline_value_meta::
2655
                                                              table_column{0},
×
2656
                                                          (log_format*) this}
×
2657
                                           .with_struct_name(FIELDS_NAME);
×
2658
                            values.lvv_values.emplace_back(lvm, fv.fv_value);
×
2659
                            values.lvv_values.back().lv_origin
×
2660
                                = to_line_range(fv.fv_str_value);
×
2661

2662
                            return fv.fv_str_value;
×
2663
                        },
×
2664
                        [](const logfmt::parser::quoted_value& qv) {
×
2665
                            return qv.qv_value;
31✔
2666
                        },
2667
                        [](const logfmt::parser::unquoted_value& uv) {
×
2668
                            return uv.uv_value;
70✔
2669
                        });
2670
                    auto value_lr = to_line_range(value_frag);
101✔
2671

2672
                    auto known_field = false;
101✔
2673
                    if (kvp.first.is_one_of(
101✔
2674
                            "timestamp"_frag, "time"_frag, "ts"_frag, "t"_frag))
2675
                    {
2676
                        sa.emplace_back(value_lr, L_TIMESTAMP.value());
16✔
2677
                        known_field = true;
16✔
2678
                    } else if (kvp.first.is_one_of("level"_frag, "lvl"_frag)) {
85✔
2679
                        sa.emplace_back(value_lr, L_LEVEL.value());
16✔
2680
                        known_field = true;
16✔
2681
                    } else if (kvp.first.is_one_of("msg"_frag,
69✔
2682
                                                   "message"_frag)) {
2683
                        sa.emplace_back(value_lr, SA_BODY.value());
16✔
2684
                        found_body += 1;
16✔
2685
                    } else if (kvp.second.is<logfmt::parser::quoted_value>()
53✔
2686
                               || kvp.second
102✔
2687
                                      .is<logfmt::parser::unquoted_value>())
49✔
2688
                    {
2689
                        auto vkind = value_frag.startswith("\"")
53✔
2690
                            ? value_kind_t::VALUE_JSON
53✔
2691
                            : value_kind_t::VALUE_TEXT;
53✔
2692
                        auto lvm = logline_value_meta{
2693
                            intern_string::lookup(kvp.first),
53✔
2694
                            vkind,
2695
                            logline_value_meta::table_column{0},
×
2696
                            (log_format*) this,
×
2697
                        };
53✔
2698
                        lvm.with_struct_name(FIELDS_NAME);
53✔
2699
                        values.lvv_values.emplace_back(lvm, value_frag);
53✔
2700
                        values.lvv_values.back().lv_origin = value_lr;
53✔
2701
                    }
53✔
2702
                    if (known_field) {
101✔
2703
                        auto key_with_eq = kvp.first;
32✔
2704
                        key_with_eq.sf_end += 1;
32✔
2705
                        sa.emplace_back(to_line_range(key_with_eq),
32✔
2706
                                        SA_REPLACED.value());
64✔
2707
                    } else {
2708
                        sa.emplace_back(to_line_range(kvp.first),
69✔
2709
                                        VC_ROLE.value(role_t::VCR_OBJECT_KEY));
138✔
2710
                    }
2711
                    return false;
101✔
2712
                },
2713
                [line_number, &sbr](const logfmt::parser::error& err) {
117✔
2714
                    log_error(
×
2715
                        "bad line %.*s", (int) sbr.length(), sbr.get_data());
2716
                    log_error("%lld:logfmt parse error: %s",
×
2717
                              line_number,
2718
                              err.e_msg.c_str());
2719
                    return true;
×
2720
                });
2721
        }
117✔
2722

2723
        if (found_body == 1) {
16✔
2724
            sa.emplace_back(line_range::empty_at(sbr.length()),
16✔
2725
                            SA_BODY.value());
32✔
2726
        }
2727

2728
        log_format::annotate(lf, line_number, sa, values);
16✔
2729
    }
16✔
2730

2731
    std::shared_ptr<log_format> specialized(int fmt_lock) override
6✔
2732
    {
2733
        auto retval = std::make_shared<logfmt_format>(*this);
6✔
2734

2735
        retval->lf_specialized = true;
6✔
2736
        return retval;
12✔
2737
    }
6✔
2738
};
2739

2740
static auto format_binder = injector::bind_multiple<log_format>()
2741
                                .add<logfmt_format>()
2742
                                .add<bro_log_format>()
2743
                                .add<w3c_log_format>()
2744
                                .add<metrics_log_format>()
2745
                                .add<o1_generic_log_format>()
2746
                                .add<piper_log_format>();
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc