• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tstack / lnav / 25032978804-3004

27 Apr 2026 09:10PM UTC coverage: 69.234% (-0.003%) from 69.237%
25032978804-3004

push

github

tstack
[ansi_scrubber] inform the user of unsupported escape sequences

81 of 121 new or added lines in 10 files covered. (66.94%)

1 existing line in 1 file now uncovered.

54057 of 78079 relevant lines covered (69.23%)

568296.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.63
/src/log_format_impls.cc
1
/**
2
 * Copyright (c) 2007-2017, Timothy Stack
3
 *
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * * Redistributions of source code must retain the above copyright notice, this
10
 * list of conditions and the following disclaimer.
11
 * * Redistributions in binary form must reproduce the above copyright notice,
12
 * this list of conditions and the following disclaimer in the documentation
13
 * and/or other materials provided with the distribution.
14
 * * Neither the name of Timothy Stack nor the names of its contributors
15
 * may be used to endorse or promote products derived from this software
16
 * without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 *
29
 * @file log_format_impls.cc
30
 */
31

32
#include <algorithm>
33
#include <chrono>
34
#include <memory>
35
#include <utility>
36

37
#include "log_format.hh"
38

39
#include <stdio.h>
40

41
#include "base/injector.bind.hh"
42
#include "base/opt_util.hh"
43
#include "base/string_attr_type.hh"
44
#include "config.h"
45
#include "formats/logfmt/logfmt.parser.hh"
46
#include "log_vtab_impl.hh"
47
#include "ptimec.hh"
48
#include "scn/scan.h"
49
#include "sql_util.hh"
50
#include "yajlpp/yajlpp.hh"
51

52
using std::string_literals::operator""s;
53

54
class piper_log_format : public log_format {
55
public:
56
    const intern_string_t get_name() const override
16,198✔
57
    {
58
        static const intern_string_t RETVAL
59
            = intern_string::lookup("lnav_piper_log");
17,852✔
60

61
        return RETVAL;
16,198✔
62
    }
63

64
    scan_result_t scan(logfile& lf,
13,061✔
65
                       std::vector<logline>& dst,
66
                       const line_info& li,
67
                       shared_buffer_ref& sbr,
68
                       scan_batch_context& sbc) override
69
    {
70
        if (lf.has_line_metadata()
13,061✔
71
            && lf.get_text_format() == text_format_t::TF_LOG)
13,061✔
72
        {
73
            dst.emplace_back(li.li_file_range.fr_offset,
291✔
74
                             to_us(li.li_timestamp),
582✔
75
                             li.li_level);
291✔
76
            return scan_match{1};
291✔
77
        }
78

79
        return scan_no_match{"not a piper capture"};
12,770✔
80
    }
81

82
    static constexpr int TIMESTAMP_SIZE = 28;
83

84
    void annotate(logfile* lf,
59✔
85
                  uint64_t line_number,
86
                  string_attrs_t& sa,
87
                  logline_value_vector& values) const override
88
    {
89
        auto lr = line_range{0, TIMESTAMP_SIZE};
59✔
90
        sa.emplace_back(lr, L_TIMESTAMP.value());
59✔
91
        log_format::annotate(lf, line_number, sa, values);
59✔
92
    }
59✔
93

94
    void get_subline(const log_format_file_state& lffs,
310✔
95
                     const logline& ll,
96
                     shared_buffer_ref& sbr,
97
                     subline_options opts) override
98
    {
99
        this->plf_cached_line.resize(TIMESTAMP_SIZE);
310✔
100
        auto tlen = sql_strftime(this->plf_cached_line.data(),
310✔
101
                                 this->plf_cached_line.size(),
102
                                 ll.get_timeval(),
310✔
103
                                 'T');
104
        this->plf_cached_line.resize(tlen);
310✔
105
        {
106
            char zone_str[16];
107
            exttm tmptm;
310✔
108

109
            tmptm.et_flags |= ETF_ZONE_SET;
310✔
110
            tmptm.et_gmtoff
111
                = lnav::local_time_to_info(
620✔
112
                      date::local_seconds{ll.get_time<std::chrono::seconds>()})
310✔
113
                      .first.offset.count();
310✔
114
            off_t zone_len = 0;
310✔
115
            ftime_z(zone_str, zone_len, sizeof(zone_str), tmptm);
310✔
116
            for (off_t lpc = 0; lpc < zone_len; lpc++) {
1,860✔
117
                this->plf_cached_line.push_back(zone_str[lpc]);
1,550✔
118
            }
119
        }
120
        this->plf_cached_line.push_back(' ');
310✔
121
        const auto prefix_len = this->plf_cached_line.size();
310✔
122
        this->plf_cached_line.resize(this->plf_cached_line.size()
620✔
123
                                     + sbr.length());
310✔
124
        memcpy(
310✔
125
            &this->plf_cached_line[prefix_len], sbr.get_data(), sbr.length());
310✔
126

127
        sbr.share(this->plf_share_manager,
620✔
128
                  this->plf_cached_line.data(),
310✔
129
                  this->plf_cached_line.size());
130
    }
310✔
131

132
    std::shared_ptr<log_format> specialized(int fmt_lock) override
6✔
133
    {
134
        auto retval = std::make_shared<piper_log_format>(*this);
6✔
135

136
        retval->lf_specialized = true;
6✔
137
        retval->lf_timestamp_flags |= ETF_ZONE_SET | ETF_MICROS_SET;
6✔
138
        return retval;
12✔
139
    }
6✔
140

141
private:
142
    shared_buffer plf_share_manager;
143
    std::vector<char> plf_cached_line;
144
};
145

146
class generic_log_format : public log_format {
147
public:
148
    static const pcre_format* get_pcre_log_formats()
13,131✔
149
    {
150
        static const pcre_format log_fmt[] = {
151
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>@[0-9a-zA-Z]{16,24}))"),
152
            pcre_format(
153
                R"((?x)^
154
  (?:\*\*\*\s+)?                              # optional "*** " prefix
155
  (?<timestamp>
156
      (?:
157
          \s
158
        | \d{4}[\-\/]\d{2}[\-\/]\d{2}         # YYYY-MM-DD or YYYY/MM/DD
159
        | T                                   # ISO date/time separator
160
        | \d{1,2}:\d{2}(?::\d{2}(?:[\.,]\d{1,9})?)?   # HH:MM[:SS[.frac]]
161
        | Z                                   # UTC zulu marker
162
        | [+\-]\d{2}:?\d{2}                   # timezone offset, +0500 or +05:00
163
        | (?!DBG|DEBUG|ERR|INFO|WARN|NONE)    # ...not one of these levels
164
          [A-Z]{3,4}                          # 3-4 uppercase letters (e.g. month/tz abbrev)
165
      )+
166
  )
167
  [:|\s]?                                     # optional separator
168
  (trc|trace|dbg|debug|info|warn(?:ing)?|err(?:or)?)   # log level
169
  [:|\s]                                      # separator
170
  \s*
171
)"),
172
            pcre_format(
173
                R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,+/-]+) \[(trace|debug|info|warn(?:ing)?|error|critical)\]\s+)"),
174
            pcre_format(
175
                R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,+/-]+) -- (trace|debug|info|warn(?:ing)?|error|critical) --\s+)"),
176

177
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+/\.-]+) \[\w\s+)"),
178
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+,/\.-]+)\s+)"),
179
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+,/\.-]+) -\s+)"),
180
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,/-]+) -\s+)"),
181
            pcre_format(
182
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\] \[(trace|debug|info|warn(?:ing)?|error|critical)\]\s+)"),
183
            pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w: "
184
                        "\\.,/-]+)\\[[^\\]]+\\]\\s+"),
185
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,/-]+)\s+)"),
186

187
            pcre_format(
188
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\]\s*(\w+):?\s+)"),
189
            pcre_format(
190
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\]\s+)"),
191
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: "
192
                        "\\.,+/-]+)\\] \\w+\\s+"),
193
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: ,+/-]+)\\] "
194
                        "\\(\\d+\\)\\s+"),
195

196
            pcre_format(),
197
        };
13,131✔
198

199
        return log_fmt;
13,131✔
200
    }
201

202
    std::string get_pattern_regex(const pattern_locks& pl,
×
203
                                  uint64_t line_number) const override
204
    {
205
        auto pat_index = pl.pattern_index_for_line(line_number);
×
206
        return get_pcre_log_formats()[pat_index].name;
×
207
    }
208

209
    const intern_string_t get_name() const override
15,852✔
210
    {
211
        static const intern_string_t RETVAL
212
            = intern_string::lookup("generic_log");
17,506✔
213

214
        return RETVAL;
15,852✔
215
    }
216

217
    scan_result_t scan(logfile& lf,
13,024✔
218
                       std::vector<logline>& dst,
219
                       const line_info& li,
220
                       shared_buffer_ref& sbr,
221
                       scan_batch_context& sbc) override
222
    {
223
        exttm log_time;
13,024✔
224
        timeval log_tv;
225
        string_fragment ts;
13,024✔
226
        std::optional<string_fragment> level;
13,024✔
227
        const char* last_pos;
228

229
        if (dst.empty()) {
13,024✔
230
            auto file_options = lf.get_file_options();
221✔
231

232
            if (file_options) {
221✔
233
                this->lf_date_time.dts_default_zone
234
                    = file_options->second.fo_default_zone.pp_value;
2✔
235
            } else {
236
                this->lf_date_time.dts_default_zone = nullptr;
219✔
237
            }
238
        }
221✔
239

240
        if ((last_pos = this->log_scanf(sbc,
26,048✔
241
                                        dst.size(),
13,024✔
242
                                        sbr.to_string_fragment(),
243
                                        get_pcre_log_formats(),
244
                                        nullptr,
245
                                        &log_time,
246
                                        &log_tv,
247

248
                                        &ts,
249
                                        &level))
250
            != nullptr)
13,024✔
251
        {
252
            auto level_val = log_level_t::LEVEL_UNKNOWN;
2,629✔
253
            if (level) {
2,629✔
254
                level_val = string2level(level->data(), level->length());
2,629✔
255
            }
256

257
            if (!((log_time.et_flags & ETF_DAY_SET)
2,629✔
258
                  && (log_time.et_flags & ETF_MONTH_SET)
2,554✔
259
                  && (log_time.et_flags & ETF_YEAR_SET)))
2,554✔
260
            {
261
                this->check_for_new_year(dst, log_time, log_tv);
898✔
262
            }
263

264
            if (!(this->lf_timestamp_flags
5,258✔
265
                  & (ETF_MILLIS_SET | ETF_MICROS_SET | ETF_NANOS_SET))
2,629✔
266
                && !dst.empty()
2,283✔
267
                && dst.back().get_time<std::chrono::seconds>().count()
2,275✔
268
                    == log_tv.tv_sec
2,275✔
269
                && dst.back()
5,986✔
270
                        .get_subsecond_time<std::chrono::microseconds>()
3,703✔
271
                        .count()
1,074✔
272
                    != 0)
273
            {
274
                auto log_ms
275
                    = dst.back()
×
276
                          .get_subsecond_time<std::chrono::microseconds>();
×
277

278
                log_time.et_nsec
279
                    = std::chrono::duration_cast<std::chrono::nanoseconds>(
×
280
                          log_ms)
281
                          .count();
×
282
                log_tv.tv_usec
283
                    = std::chrono::duration_cast<std::chrono::microseconds>(
×
284
                          log_ms)
285
                          .count();
×
286
            }
287

288
            auto log_us = to_us(log_tv);
2,629✔
289
            auto tid_iter = sbc.sbc_tids.insert_tid(
2,629✔
290
                sbc.sbc_allocator, string_fragment{}, log_us);
×
291
            tid_iter->second.titr_level_stats.update_msg_count(level_val);
2,629✔
292
            dst.emplace_back(li.li_file_range.fr_offset, log_us, level_val);
2,629✔
293
            return scan_match{5};
2,629✔
294
        }
295

296
        return scan_no_match{"no patterns matched"};
10,395✔
297
    }
298

299
    void annotate(logfile* lf,
107✔
300
                  uint64_t line_number,
301
                  string_attrs_t& sa,
302
                  logline_value_vector& values) const override
303
    {
304
        thread_local auto md = lnav::pcre2pp::match_data::unitialized();
107✔
305
        auto lffs = lf->get_format_file_state();
107✔
306
        auto& line = values.lvv_sbr;
107✔
307
        int pat_index
308
            = lffs.lffs_pattern_locks.pattern_index_for_line(line_number);
107✔
309
        const auto& fmt = get_pcre_log_formats()[pat_index];
107✔
310
        const auto line_sf = line.to_string_fragment();
107✔
311
        auto match_res = fmt.pcre->capture_from(line_sf)
107✔
312
                             .into(md)
107✔
313
                             .matches(PCRE2_NO_UTF_CHECK)
214✔
314
                             .ignore_error();
107✔
315
        if (!match_res) {
107✔
316
            return;
13✔
317
        }
318

319
        int prefix_len = md.remaining().sf_begin;
94✔
320
        auto ts_cap = md[fmt.pf_timestamp_index].value();
94✔
321
        auto lr = to_line_range(ts_cap.trim());
94✔
322
        auto level_cap = md[2];
94✔
323

324
        if (!level_cap) {
94✔
325
            lr.lr_end = prefix_len = lr.lr_start + this->lf_date_time.dts_fmt_len;
21✔
326
        }
327
        sa.emplace_back(lr, L_TIMESTAMP.value());
94✔
328

329
        values.lvv_values.emplace_back(TS_META, line, lr);
94✔
330
        values.lvv_values.back().lv_meta.lvm_format = (log_format*) this;
94✔
331

332
        if (level_cap) {
94✔
333
            if (string2level(level_cap->data(), level_cap->length(), true)
73✔
334
                != LEVEL_UNKNOWN)
73✔
335
            {
336
                values.lvv_values.emplace_back(
73✔
337
                    LEVEL_META, line, to_line_range(level_cap->trim()));
73✔
338
                values.lvv_values.back().lv_meta.lvm_format
73✔
339
                    = (log_format*) this;
73✔
340

341
                lr = to_line_range(level_cap->trim());
73✔
342
                if (lr.lr_end != (ssize_t) line.length()) {
73✔
343
                    sa.emplace_back(lr, L_LEVEL.value());
73✔
344
                }
345
            }
346
        }
347

348
        lr.lr_start = 0;
94✔
349
        lr.lr_end = prefix_len;
94✔
350
        sa.emplace_back(lr, L_PREFIX.value());
94✔
351

352
        lr.lr_start = prefix_len;
94✔
353
        lr.lr_end = line.length();
94✔
354
        sa.emplace_back(lr, SA_BODY.value());
94✔
355

356
        log_format::annotate(lf, line_number, sa, values);
94✔
357
    }
358

359
    std::shared_ptr<log_format> specialized(int fmt_lock) override
56✔
360
    {
361
        auto retval = std::make_shared<generic_log_format>(*this);
56✔
362

363
        retval->lf_specialized = true;
56✔
364
        return retval;
112✔
365
    }
56✔
366

367
    bool hide_field(const intern_string_t field_name, bool val) override
2✔
368
    {
369
        if (field_name == TS_META.lvm_name) {
2✔
370
            TS_META.lvm_user_hidden = val;
1✔
371
            return true;
1✔
372
        }
373
        if (field_name == LEVEL_META.lvm_name) {
1✔
374
            LEVEL_META.lvm_user_hidden = val;
1✔
375
            return true;
1✔
376
        }
377
        if (field_name == OPID_META.lvm_name) {
×
378
            OPID_META.lvm_user_hidden = val;
×
379
            return true;
×
380
        }
381
        return false;
×
382
    }
383

384
    std::map<intern_string_t, logline_value_meta> get_field_states() override
65✔
385
    {
386
        return {
387
            {TS_META.lvm_name, TS_META},
388
            {LEVEL_META.lvm_name, LEVEL_META},
389
            {OPID_META.lvm_name, OPID_META},
390
        };
325✔
391
    }
65✔
392

393
private:
394
    static logline_value_meta TS_META;
395
    static logline_value_meta LEVEL_META;
396
    static logline_value_meta OPID_META;
397
};
398

399
logline_value_meta generic_log_format::TS_META{
400
    intern_string::lookup("log_time"),
401
    value_kind_t::VALUE_TEXT,
402
    logline_value_meta::table_column{2},
403
};
404

405
logline_value_meta generic_log_format::LEVEL_META{
406
    intern_string::lookup("log_level"),
407
    value_kind_t::VALUE_TEXT,
408
    logline_value_meta::table_column{3},
409
};
410

411
logline_value_meta generic_log_format::OPID_META{
412
    intern_string::lookup("log_opid"),
413
    value_kind_t::VALUE_TEXT,
414
    logline_value_meta::internal_column{},
415
};
416

417
std::string
418
from_escaped_string(const char* str, size_t len)
24✔
419
{
420
    std::string retval;
24✔
421

422
    for (size_t lpc = 0; lpc < len; lpc++) {
48✔
423
        switch (str[lpc]) {
24✔
424
            case '\\':
24✔
425
                if ((lpc + 3) < len && str[lpc + 1] == 'x') {
24✔
426
                    int ch;
427

428
                    if (sscanf(&str[lpc + 2], "%2x", &ch) == 1) {
24✔
429
                        retval.append(1, (char) ch & 0xff);
24✔
430
                        lpc += 3;
24✔
431
                    }
432
                }
433
                break;
24✔
434
            default:
×
435
                retval.append(1, str[lpc]);
×
436
                break;
×
437
        }
438
    }
439

440
    return retval;
24✔
441
}
×
442

443
std::optional<const char*>
444
lnav_strnstr(const char* s, const char* find, size_t slen)
1,652,478✔
445
{
446
    char c, sc;
447
    size_t len;
448

449
    if ((c = *find++) != '\0') {
1,652,478✔
450
        len = strlen(find);
1,652,478✔
451
        do {
452
            do {
453
                if (slen < 1 || (sc = *s) == '\0') {
7,237,139✔
454
                    return std::nullopt;
896,541✔
455
                }
456
                --slen;
6,340,598✔
457
                ++s;
6,340,598✔
458
            } while (sc != c);
6,340,598✔
459
            if (len > slen) {
755,937✔
460
                return std::nullopt;
×
461
            }
462
        } while (strncmp(s, find, len) != 0);
755,937✔
463
        s--;
755,937✔
464
    }
465
    return s;
755,937✔
466
}
467

468
struct separated_string {
469
    const char* ss_str;
470
    size_t ss_len;
471
    const char* ss_separator;
472
    size_t ss_separator_len;
473

474
    separated_string(const char* str, size_t len)
35,383✔
475
        : ss_str(str), ss_len(len), ss_separator(","),
35,383✔
476
          ss_separator_len(strlen(this->ss_separator))
35,383✔
477
    {
478
    }
35,383✔
479

480
    separated_string& with_separator(const char* sep)
35,383✔
481
    {
482
        this->ss_separator = sep;
35,383✔
483
        this->ss_separator_len = strlen(sep);
35,383✔
484
        return *this;
35,383✔
485
    }
486

487
    struct iterator {
488
        const separated_string& i_parent;
489
        const char* i_pos;
490
        const char* i_next_pos;
491
        size_t i_index;
492

493
        iterator(const separated_string& ss, const char* pos)
861,538✔
494
            : i_parent(ss), i_pos(pos), i_next_pos(pos), i_index(0)
861,538✔
495
        {
496
            this->update();
861,538✔
497
        }
861,538✔
498

499
        void update()
1,652,478✔
500
        {
501
            const separated_string& ss = this->i_parent;
1,652,478✔
502
            auto next_field
503
                = lnav_strnstr(this->i_pos,
1,652,478✔
504
                               ss.ss_separator,
1,652,478✔
505
                               ss.ss_len - (this->i_pos - ss.ss_str));
1,652,478✔
506
            if (next_field) {
1,652,478✔
507
                this->i_next_pos = next_field.value() + ss.ss_separator_len;
755,937✔
508
            } else {
509
                this->i_next_pos = ss.ss_str + ss.ss_len;
896,541✔
510
            }
511
        }
1,652,478✔
512

513
        iterator& operator++()
790,940✔
514
        {
515
            this->i_pos = this->i_next_pos;
790,940✔
516
            this->update();
790,940✔
517
            this->i_index += 1;
790,940✔
518

519
            return *this;
790,940✔
520
        }
521

522
        string_fragment operator*()
701,338✔
523
        {
524
            const auto& ss = this->i_parent;
701,338✔
525
            int end;
526

527
            if (this->i_next_pos < (ss.ss_str + ss.ss_len)) {
701,338✔
528
                end = this->i_next_pos - ss.ss_str - ss.ss_separator_len;
671,043✔
529
            } else {
530
                end = this->i_next_pos - ss.ss_str;
30,295✔
531
            }
532
            return string_fragment::from_byte_range(
701,338✔
533
                ss.ss_str, this->i_pos - ss.ss_str, end);
701,338✔
534
        }
535

536
        bool operator==(const iterator& other) const
826,155✔
537
        {
538
            return (&this->i_parent == &other.i_parent)
826,155✔
539
                && (this->i_pos == other.i_pos);
826,155✔
540
        }
541

542
        bool operator!=(const iterator& other) const
825,987✔
543
        {
544
            return !(*this == other);
825,987✔
545
        }
546

547
        size_t index() const { return this->i_index; }
1,718,319✔
548
    };
549

550
    iterator begin() { return {*this, this->ss_str}; }
35,383✔
551

552
    iterator end() { return {*this, this->ss_str + this->ss_len}; }
826,155✔
553
};
554

555
class bro_log_format : public log_format {
556
public:
557
    static const intern_string_t TS;
558
    static const intern_string_t DURATION;
559
    struct field_def {
560
        logline_value_meta fd_meta;
561
        logline_value_meta* fd_root_meta;
562
        std::string fd_collator;
563
        std::optional<size_t> fd_numeric_index;
564

565
        explicit field_def(const intern_string_t name,
680✔
566
                           size_t col,
567
                           log_format* format)
568
            : fd_meta(name,
1,360✔
569
                      value_kind_t::VALUE_TEXT,
570
                      logline_value_meta::table_column{col},
680✔
571
                      format),
572
              fd_root_meta(&FIELD_META.find(name)->second)
680✔
573
        {
574
        }
680✔
575

576
        field_def& with_kind(value_kind_t kind,
500✔
577
                             bool identifier = false,
578
                             bool foreign_key = false,
579
                             const std::string& collator = "")
580
        {
581
            this->fd_meta.lvm_kind = kind;
500✔
582
            this->fd_meta.lvm_identifier = identifier;
500✔
583
            this->fd_meta.lvm_foreign_key = foreign_key;
500✔
584
            this->fd_collator = collator;
500✔
585
            return *this;
500✔
586
        }
587

588
        field_def& with_numeric_index(size_t index)
126✔
589
        {
590
            this->fd_numeric_index = index;
126✔
591
            return *this;
126✔
592
        }
593
    };
594

595
    static std::unordered_map<const intern_string_t, logline_value_meta>
596
        FIELD_META;
597

598
    static const intern_string_t get_opid_desc()
833✔
599
    {
600
        static const intern_string_t RETVAL = intern_string::lookup("std");
2,499✔
601

602
        return RETVAL;
833✔
603
    }
604

605
    bro_log_format()
833✔
606
    {
833✔
607
        this->lf_structured = true;
833✔
608
        this->lf_is_self_describing = true;
833✔
609
        this->lf_time_ordered = false;
833✔
610
        this->lf_timestamp_point_of_reference
611
            = timestamp_point_of_reference_t::start;
833✔
612

613
        auto desc_v = std::make_shared<std::vector<opid_descriptor>>();
833✔
614
        desc_v->emplace({});
833✔
615
        auto emplace_res = this->lf_opid_description_def->emplace(
1,666✔
616
            get_opid_desc(), opid_descriptors{{}, desc_v, 0});
1,666✔
617
        this->lf_opid_description_def_vec->emplace_back(
833✔
618
            &emplace_res.first->second);
833✔
619
    }
833✔
620

621
    const intern_string_t get_name() const override
122,296✔
622
    {
623
        static const intern_string_t name(intern_string::lookup("bro"));
123,950✔
624

625
        return this->blf_format_name.empty() ? name : this->blf_format_name;
122,296✔
626
    }
627

628
    void clear() override
13,085✔
629
    {
630
        this->log_format::clear();
13,085✔
631
        this->blf_format_name.clear();
13,085✔
632
        this->blf_field_defs.clear();
13,085✔
633
    }
13,085✔
634

635
    std::vector<logline_value_meta> get_value_metadata() const override
×
636
    {
637
        std::vector<logline_value_meta> retval;
×
638

639
        for (const auto& fd : this->blf_field_defs) {
×
640
            retval.emplace_back(fd.fd_meta);
×
641
        }
642
        return retval;
×
643
    }
×
644

645
    scan_result_t scan_int(std::vector<logline>& dst,
4,857✔
646
                           const line_info& li,
647
                           shared_buffer_ref& sbr,
648
                           scan_batch_context& sbc)
649
    {
650
        static const intern_string_t STATUS_CODE
651
            = intern_string::lookup("bro_status_code");
4,903✔
652
        static const intern_string_t UID = intern_string::lookup("bro_uid");
4,903✔
653
        static const intern_string_t ID_ORIG_H
654
            = intern_string::lookup("bro_id_orig_h");
4,903✔
655

656
        separated_string ss(sbr.get_data(), sbr.length());
4,857✔
657
        timeval tv;
658
        exttm tm;
4,857✔
659
        size_t found_ts = 0;
4,857✔
660
        log_level_t level = LEVEL_INFO;
4,857✔
661
        uint64_t opid_bloom = 0;
4,857✔
662
        auto opid_cap = string_fragment::invalid();
4,857✔
663
        auto host_cap = string_fragment::invalid();
4,857✔
664
        auto duration = std::chrono::microseconds{0};
4,857✔
665

666
        sbc.sbc_value_stats.resize(this->blf_field_defs.size());
4,857✔
667
        ss.with_separator(this->blf_separator.get());
4,857✔
668

669
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
143,439✔
670
            if (iter.index() == 0 && *iter == "#close") {
138,608✔
671
                return scan_match{2000};
26✔
672
            }
673

674
            if (iter.index() >= this->blf_field_defs.size()) {
138,582✔
675
                break;
×
676
            }
677

678
            const auto& fd = this->blf_field_defs[iter.index()];
138,582✔
679

680
            if (TS == fd.fd_meta.lvm_name) {
138,582✔
681
                static const char* const TIME_FMT[] = {"%s.%f"};
682
                const auto sf = *iter;
4,830✔
683

684
                if (this->lf_date_time.scan(
4,830✔
685
                        sf.data(), sf.length(), TIME_FMT, &tm, tv))
4,830✔
686
                {
687
                    this->lf_timestamp_flags = tm.et_flags;
4,830✔
688
                    found_ts += 1;
4,830✔
689
                }
690
            } else if (STATUS_CODE == fd.fd_meta.lvm_name) {
133,752✔
691
                const auto sf = *iter;
4,644✔
692

693
                if (!sf.empty() && sf[0] >= '4') {
4,644✔
694
                    level = LEVEL_ERROR;
23✔
695
                }
696
            } else if (UID == fd.fd_meta.lvm_name) {
129,108✔
697
                opid_cap = *iter;
4,830✔
698

699
                opid_bloom = opid_cap.bloom_bits();
4,830✔
700
            } else if (ID_ORIG_H == fd.fd_meta.lvm_name) {
124,278✔
701
                host_cap = *iter;
4,830✔
702
            } else if (DURATION == fd.fd_meta.lvm_name) {
119,448✔
703
                const auto sf = *iter;
186✔
704
                auto scan_res = scn::scan<double>("{}", sf.to_string_view());
186✔
705
                if (scan_res) {
186✔
706
                    duration = std::chrono::microseconds{
×
707
                        static_cast<long long>(scan_res->value() * 1000000)};
708
                }
709
            }
710

711
            if (fd.fd_numeric_index) {
138,582✔
712
                switch (fd.fd_meta.lvm_kind) {
24,708✔
713
                    case value_kind_t::VALUE_INTEGER:
24,708✔
714
                    case value_kind_t::VALUE_FLOAT: {
715
                        const auto sv = (*iter).to_string_view();
24,708✔
716
                        auto scan_float_res = scn::scan_value<double>(sv);
24,708✔
717
                        if (scan_float_res) {
24,708✔
718
                            sbc.sbc_value_stats[fd.fd_numeric_index.value()]
20,064✔
719
                                .add_value(scan_float_res->value());
20,064✔
720
                        }
721
                        break;
24,708✔
722
                    }
723
                    default:
×
724
                        break;
×
725
                }
726
            }
727
        }
728

729
        if (found_ts == 1) {
4,831✔
730
            if (!this->lf_specialized) {
4,830✔
731
                for (auto& ll : dst) {
216✔
732
                    ll.set_ignore(true);
192✔
733
                }
734
            }
735

736
            auto log_us = to_us(tv);
4,830✔
737
            if (opid_cap.is_valid()) {
4,830✔
738
                auto opid_iter = sbc.sbc_opids.insert_op(
4,830✔
739
                    sbc.sbc_allocator,
740
                    opid_cap,
741
                    log_us,
742
                    this->lf_timestamp_point_of_reference,
743
                    duration);
744
                opid_iter->second.otr_level_stats.update_msg_count(level);
4,830✔
745

746
                auto& otr = opid_iter->second;
4,830✔
747
                if (!otr.otr_description.lod_index && host_cap.is_valid()
7,039✔
748
                    && otr.otr_description.lod_elements.empty())
7,039✔
749
                {
750
                    otr.otr_description.lod_index = 0;
2,209✔
751
                    otr.otr_description.lod_elements.insert(
4,418✔
752
                        0, host_cap.to_string());
2,209✔
753
                }
754
            }
755
            dst.emplace_back(li.li_file_range.fr_offset, log_us, level);
4,830✔
756
            dst.back().merge_bloom_bits(opid_bloom);
4,830✔
757
            return scan_match{2000};
4,830✔
758
        }
759
        return scan_no_match{"no header found"};
1✔
760
    }
761

762
    scan_result_t scan(logfile& lf,
13,155✔
763
                       std::vector<logline>& dst,
764
                       const line_info& li,
765
                       shared_buffer_ref& sbr,
766
                       scan_batch_context& sbc) override
767
    {
768
        static const auto SEP_RE
769
            = lnav::pcre2pp::code::from_const(R"(^#separator\s+(.+))");
13,155✔
770

771
        if (dst.empty()) {
13,155✔
772
            auto file_options = lf.get_file_options();
1,377✔
773

774
            if (file_options) {
1,377✔
775
                this->lf_date_time.dts_default_zone
776
                    = file_options->second.fo_default_zone.pp_value;
57✔
777
            } else {
778
                this->lf_date_time.dts_default_zone = nullptr;
1,320✔
779
            }
780
        }
1,377✔
781

782
        if (!this->blf_format_name.empty()) {
13,155✔
783
            return this->scan_int(dst, li, sbr, sbc);
4,833✔
784
        }
785

786
        if (dst.size() <= 2 || dst.size() > 20 || sbr.empty()
14,161✔
787
            || sbr.get_data()[0] == '#')
14,161✔
788
        {
789
            return scan_no_match{"no header found"};
6,433✔
790
        }
791

792
        auto line_iter = dst.begin();
1,889✔
793
        auto read_result = lf.read_line(line_iter);
1,889✔
794

795
        if (read_result.isErr()) {
1,889✔
796
            return scan_no_match{"unable to read first line"};
×
797
        }
798

799
        auto line = read_result.unwrap();
1,889✔
800
        auto md = SEP_RE.create_match_data();
1,889✔
801

802
        auto match_res = SEP_RE.capture_from(line.to_string_fragment())
1,889✔
803
                             .into(md)
1,889✔
804
                             .matches(PCRE2_NO_UTF_CHECK)
3,778✔
805
                             .ignore_error();
1,889✔
806
        if (!match_res) {
1,889✔
807
            return scan_no_match{"cannot read separator header"};
1,865✔
808
        }
809

810
        this->clear();
24✔
811

812
        auto sep = from_escaped_string(md[1]->data(), md[1]->length());
24✔
813
        this->blf_separator = intern_string::lookup(sep);
24✔
814

815
        for (++line_iter; line_iter != dst.end(); ++line_iter) {
192✔
816
            auto next_read_result = lf.read_line(line_iter);
168✔
817

818
            if (next_read_result.isErr()) {
168✔
819
                return scan_no_match{"unable to read header line"};
×
820
            }
821

822
            line = next_read_result.unwrap();
168✔
823
            separated_string ss(line.get_data(), line.length());
168✔
824

825
            ss.with_separator(this->blf_separator.get());
168✔
826
            auto iter = ss.begin();
168✔
827

828
            string_fragment directive = *iter;
168✔
829

830
            if (directive.empty() || directive[0] != '#') {
168✔
831
                continue;
×
832
            }
833

834
            ++iter;
168✔
835
            if (iter == ss.end()) {
168✔
836
                continue;
×
837
            }
838

839
            if (directive == "#set_separator") {
168✔
840
                this->blf_set_separator = intern_string::lookup(*iter);
24✔
841
            } else if (directive == "#empty_field") {
144✔
842
                this->blf_empty_field = intern_string::lookup(*iter);
24✔
843
            } else if (directive == "#unset_field") {
120✔
844
                this->blf_unset_field = intern_string::lookup(*iter);
24✔
845
            } else if (directive == "#path") {
96✔
846
                auto full_name = fmt::format(FMT_STRING("bro_{}_log"), *iter);
72✔
847
                this->blf_format_name = intern_string::lookup(full_name);
24✔
848
            } else if (directive == "#fields" && this->blf_field_defs.empty()) {
96✔
849
                do {
850
                    auto field_name
851
                        = intern_string::lookup("bro_" + sql_safe_ident(*iter));
680✔
852
                    auto common_iter = FIELD_META.find(field_name);
680✔
853
                    if (common_iter == FIELD_META.end()) {
680✔
854
                        FIELD_META.emplace(field_name,
674✔
855
                                           logline_value_meta{
1,348✔
856
                                               field_name,
857
                                               value_kind_t::VALUE_TEXT,
858
                                           });
859
                    }
860
                    this->blf_field_defs.emplace_back(
1,360✔
861
                        field_name, this->blf_field_defs.size(), this);
680✔
862
                    ++iter;
680✔
863
                } while (iter != ss.end());
680✔
864
            } else if (directive == "#types") {
48✔
865
                static const char* KNOWN_IDS[] = {
866
                    "bro_conn_uids",
867
                    "bro_fuid",
868
                    "bro_host",
869
                    "bro_info_code",
870
                    "bro_method",
871
                    "bro_mime_type",
872
                    "bro_orig_fuids",
873
                    "bro_parent_fuid",
874
                    "bro_proto",
875
                    "bro_referrer",
876
                    "bro_resp_fuids",
877
                    "bro_service",
878
                    "bro_uid",
879
                    "bro_uri",
880
                    "bro_user_agent",
881
                    "bro_username",
882
                };
883
                static const char* KNOWN_FOREIGN[] = {
884
                    "bro_status_code",
885
                };
886

887
                int numeric_count = 0;
24✔
888

889
                do {
890
                    string_fragment field_type = *iter;
680✔
891
                    auto& fd = this->blf_field_defs[iter.index() - 1];
680✔
892

893
                    if (field_type == "time") {
680✔
894
                        fd.with_kind(value_kind_t::VALUE_TIMESTAMP);
48✔
895
                    } else if (field_type == "string") {
656✔
896
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
500✔
897
                                                        std::end(KNOWN_IDS),
898
                                                        fd.fd_meta.lvm_name);
250✔
899
                        fd.with_kind(value_kind_t::VALUE_TEXT, ident);
500✔
900
                    } else if (field_type == "count") {
406✔
901
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
248✔
902
                                                        std::end(KNOWN_IDS),
903
                                                        fd.fd_meta.lvm_name);
124✔
904
                        bool foreign
905
                            = std::binary_search(std::begin(KNOWN_FOREIGN),
248✔
906
                                                 std::end(KNOWN_FOREIGN),
907
                                                 fd.fd_meta.lvm_name);
124✔
908
                        fd.with_kind(
248✔
909
                              value_kind_t::VALUE_INTEGER, ident, foreign)
910
                            .with_numeric_index(numeric_count);
124✔
911
                        numeric_count += 1;
124✔
912
                    } else if (field_type == "bool") {
282✔
913
                        fd.with_kind(value_kind_t::VALUE_BOOLEAN);
8✔
914
                    } else if (field_type == "addr") {
278✔
915
                        fd.with_kind(
96✔
916
                            value_kind_t::VALUE_TEXT, true, false, "ipaddress");
917
                    } else if (field_type == "port") {
230✔
918
                        fd.with_kind(value_kind_t::VALUE_INTEGER, true);
96✔
919
                    } else if (field_type == "interval") {
182✔
920
                        fd.with_kind(value_kind_t::VALUE_FLOAT)
4✔
921
                            .with_numeric_index(numeric_count);
2✔
922
                        numeric_count += 1;
2✔
923
                    }
924

925
                    ++iter;
680✔
926
                } while (iter != ss.end());
680✔
927
            }
928
        }
168✔
929

930
        if (!this->blf_format_name.empty() && !this->blf_separator.empty()
48✔
931
            && !this->blf_field_defs.empty())
48✔
932
        {
933
            return this->scan_int(dst, li, sbr, sbc);
24✔
934
        }
935

936
        this->blf_format_name.clear();
×
937

938
        return scan_no_match{"no header found"};
×
939
    }
1,889✔
940

941
    void annotate(logfile* lf,
30,358✔
942
                  uint64_t line_number,
943
                  string_attrs_t& sa,
944
                  logline_value_vector& values) const override
945
    {
946
        static const intern_string_t UID = intern_string::lookup("bro_uid");
30,398✔
947

948
        auto& sbr = values.lvv_sbr;
30,358✔
949
        separated_string ss(sbr.get_data(), sbr.length());
30,358✔
950

951
        ss.with_separator(this->blf_separator.get());
30,358✔
952

953
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
681,188✔
954
            if (iter.index() >= this->blf_field_defs.size()) {
651,037✔
955
                return;
207✔
956
            }
957

958
            const field_def& fd = this->blf_field_defs[iter.index()];
650,830✔
959
            string_fragment sf = *iter;
650,830✔
960

961
            if (sf == this->blf_empty_field) {
650,830✔
962
                sf.clear();
30,161✔
963
            } else if (sf == this->blf_unset_field) {
620,669✔
964
                sf.invalidate();
74,315✔
965
            }
966

967
            auto lr = line_range(sf.sf_begin, sf.sf_end);
650,830✔
968

969
            if (fd.fd_meta.lvm_name == TS) {
650,830✔
970
                sa.emplace_back(lr, L_TIMESTAMP.value());
30,358✔
971
            } else if (fd.fd_meta.lvm_name == UID) {
620,472✔
972
                sa.emplace_back(lr, L_OPID.value());
30,358✔
973
                values.lvv_opid_value = sf.to_string();
30,358✔
974
                values.lvv_opid_provenance
975
                    = logline_value_vector::opid_provenance::file;
30,358✔
976
            }
977

978
            if (lr.is_valid()) {
650,830✔
979
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
576,515✔
980
            } else {
981
                values.lvv_values.emplace_back(fd.fd_meta);
74,315✔
982
            }
983
            values.lvv_values.back().lv_meta.lvm_user_hidden
650,830✔
984
                = fd.fd_root_meta->lvm_user_hidden;
650,830✔
985
        }
986

987
        log_format::annotate(lf, line_number, sa, values);
30,151✔
988
    }
989

990
    std::optional<size_t> stats_index_for_value(
36✔
991
        const intern_string_t& name) const override
992
    {
993
        for (const auto& blf_field_def : this->blf_field_defs) {
540✔
994
            if (blf_field_def.fd_meta.lvm_name == name) {
540✔
995
                if (!blf_field_def.fd_numeric_index) {
36✔
996
                    break;
×
997
                }
998
                return blf_field_def.fd_numeric_index.value();
36✔
999
            }
1000
        }
1001

1002
        return std::nullopt;
×
1003
    }
1004

1005
    bool hide_field(intern_string_t field_name, bool val) override
2✔
1006
    {
1007
        if (field_name == LOG_TIME_STR) {
2✔
1008
            field_name = TS;
×
1009
        }
1010

1011
        auto fd_iter = FIELD_META.find(field_name);
2✔
1012
        if (fd_iter == FIELD_META.end()) {
2✔
1013
            return false;
×
1014
        }
1015

1016
        fd_iter->second.lvm_user_hidden = val;
2✔
1017

1018
        return true;
2✔
1019
    }
1020

1021
    std::map<intern_string_t, logline_value_meta> get_field_states() override
65✔
1022
    {
1023
        std::map<intern_string_t, logline_value_meta> retval;
65✔
1024

1025
        for (const auto& fd : FIELD_META) {
181✔
1026
            retval.emplace(fd.first, fd.second);
116✔
1027
        }
1028

1029
        return retval;
65✔
1030
    }
×
1031

1032
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
24✔
1033
    {
1034
        auto retval = std::make_shared<bro_log_format>(*this);
24✔
1035

1036
        retval->lf_specialized = true;
24✔
1037
        for (auto& fd : retval->blf_field_defs) {
704✔
1038
            fd.fd_meta.lvm_format = retval.get();
680✔
1039
        }
1040
        return retval;
48✔
1041
    }
24✔
1042

1043
    class bro_log_table : public log_format_vtab_impl {
1044
    public:
1045
        explicit bro_log_table(std::shared_ptr<const log_format> format)
22✔
1046
            : log_format_vtab_impl(format),
22✔
1047
              blt_format(dynamic_cast<const bro_log_format*>(format.get()))
22✔
1048
        {
1049
        }
22✔
1050

1051
        void get_columns(std::vector<vtab_column>& cols) const override
31✔
1052
        {
1053
            for (const auto& fd : this->blt_format->blf_field_defs) {
914✔
1054
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
883✔
1055
                    fd.fd_meta.lvm_kind);
883✔
1056

1057
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
883✔
1058
                                  type_pair.first,
1059
                                  fd.fd_collator,
883✔
1060
                                  false,
1,766✔
1061
                                  "",
1062
                                  type_pair.second);
1063
            }
1064
        }
31✔
1065

1066
        void get_foreign_keys(
11✔
1067
            std::unordered_set<std::string>& keys_inout) const override
1068
        {
1069
            this->log_vtab_impl::get_foreign_keys(keys_inout);
11✔
1070

1071
            for (const auto& fd : this->blt_format->blf_field_defs) {
322✔
1072
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
311✔
1073
                    keys_inout.emplace(fd.fd_meta.lvm_name.to_string());
136✔
1074
                }
1075
            }
1076
        }
11✔
1077

1078
        const bro_log_format* blt_format;
1079
    };
1080

1081
    static std::map<intern_string_t, std::shared_ptr<bro_log_table>>&
1082
    get_tables()
22✔
1083
    {
1084
        static std::map<intern_string_t, std::shared_ptr<bro_log_table>> retval;
22✔
1085

1086
        return retval;
22✔
1087
    }
1088

1089
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
734✔
1090
    {
1091
        if (this->blf_format_name.empty()) {
734✔
1092
            return nullptr;
712✔
1093
        }
1094

1095
        std::shared_ptr<bro_log_table> retval = nullptr;
22✔
1096

1097
        auto& tables = get_tables();
22✔
1098
        const auto iter = tables.find(this->blf_format_name);
22✔
1099
        if (iter == tables.end()) {
22✔
1100
            retval = std::make_shared<bro_log_table>(this->shared_from_this());
22✔
1101
            tables[this->blf_format_name] = retval;
22✔
1102
        }
1103

1104
        return retval;
22✔
1105
    }
22✔
1106

1107
    void get_subline(const log_format_file_state& lffs,
34,939✔
1108
                     const logline& ll,
1109
                     shared_buffer_ref& sbr,
1110
                     subline_options opts) override
1111
    {
1112
    }
34,939✔
1113

1114
    intern_string_t blf_format_name;
1115
    intern_string_t blf_separator;
1116
    intern_string_t blf_set_separator;
1117
    intern_string_t blf_empty_field;
1118
    intern_string_t blf_unset_field;
1119
    std::vector<field_def> blf_field_defs;
1120
};
1121

1122
std::unordered_map<const intern_string_t, logline_value_meta>
1123
    bro_log_format::FIELD_META;
1124

1125
const intern_string_t bro_log_format::TS = intern_string::lookup("bro_ts");
1126
const intern_string_t bro_log_format::DURATION
1127
    = intern_string::lookup("bro_duration");
1128

1129
struct ws_separated_string {
1130
    const char* ss_str;
1131
    size_t ss_len;
1132

1133
    explicit ws_separated_string(const char* str = nullptr, size_t len = -1)
23,105✔
1134
        : ss_str(str), ss_len(len)
23,105✔
1135
    {
1136
    }
23,105✔
1137

1138
    struct iterator {
1139
        enum class state_t {
1140
            NORMAL,
1141
            QUOTED,
1142
        };
1143

1144
        const ws_separated_string& i_parent;
1145
        const char* i_pos;
1146
        const char* i_next_pos;
1147
        size_t i_index{0};
1148
        state_t i_state{state_t::NORMAL};
1149

1150
        iterator(const ws_separated_string& ss, const char* pos)
71,916✔
1151
            : i_parent(ss), i_pos(pos), i_next_pos(pos)
71,916✔
1152
        {
1153
            this->update();
71,916✔
1154
        }
71,916✔
1155

1156
        void update()
117,954✔
1157
        {
1158
            const auto& ss = this->i_parent;
117,954✔
1159
            bool done = false;
117,954✔
1160

1161
            while (!done && this->i_next_pos < (ss.ss_str + ss.ss_len)) {
850,108✔
1162
                switch (this->i_state) {
732,154✔
1163
                    case state_t::NORMAL:
725,685✔
1164
                        if (*this->i_next_pos == '"') {
725,685✔
1165
                            this->i_state = state_t::QUOTED;
237✔
1166
                        } else if (isspace(*this->i_next_pos)) {
725,448✔
1167
                            done = true;
60,458✔
1168
                        }
1169
                        break;
725,685✔
1170
                    case state_t::QUOTED:
6,469✔
1171
                        if (*this->i_next_pos == '"') {
6,469✔
1172
                            this->i_state = state_t::NORMAL;
237✔
1173
                        }
1174
                        break;
6,469✔
1175
                }
1176
                if (!done) {
732,154✔
1177
                    this->i_next_pos += 1;
671,696✔
1178
                }
1179
            }
1180
        }
117,954✔
1181

1182
        iterator& operator++()
46,038✔
1183
        {
1184
            const auto& ss = this->i_parent;
46,038✔
1185

1186
            this->i_pos = this->i_next_pos;
46,038✔
1187
            while (this->i_pos < (ss.ss_str + ss.ss_len)
46,038✔
1188
                   && isspace(*this->i_pos))
89,470✔
1189
            {
1190
                this->i_pos += 1;
43,432✔
1191
                this->i_next_pos += 1;
43,432✔
1192
            }
1193
            this->update();
46,038✔
1194
            this->i_index += 1;
46,038✔
1195

1196
            return *this;
46,038✔
1197
        }
1198

1199
        string_fragment operator*()
63,797✔
1200
        {
1201
            const auto& ss = this->i_parent;
63,797✔
1202
            int end = this->i_next_pos - ss.ss_str;
63,797✔
1203

1204
            return string_fragment(ss.ss_str, this->i_pos - ss.ss_str, end);
63,797✔
1205
        }
1206

1207
        bool operator==(const iterator& other) const
48,811✔
1208
        {
1209
            return (&this->i_parent == &other.i_parent)
48,811✔
1210
                && (this->i_pos == other.i_pos);
48,811✔
1211
        }
1212

1213
        bool operator!=(const iterator& other) const
46,030✔
1214
        {
1215
            return !(*this == other);
46,030✔
1216
        }
1217

1218
        size_t index() const { return this->i_index; }
86,530✔
1219
    };
1220

1221
    iterator begin() { return {*this, this->ss_str}; }
23,105✔
1222

1223
    iterator end() { return {*this, this->ss_str + this->ss_len}; }
48,811✔
1224
};
1225

1226
class w3c_log_format : public log_format {
1227
public:
1228
    static const intern_string_t F_DATE;
1229
    static const intern_string_t F_TIME;
1230

1231
    struct field_def {
1232
        const intern_string_t fd_name;
1233
        logline_value_meta fd_meta;
1234
        logline_value_meta* fd_root_meta{nullptr};
1235
        std::string fd_collator;
1236
        std::optional<size_t> fd_numeric_index;
1237

1238
        explicit field_def(const intern_string_t name)
18✔
1239
            : fd_name(name), fd_meta(intern_string::lookup(sql_safe_ident(
36✔
1240
                                         name.to_string_fragment())),
36✔
1241
                                     value_kind_t::VALUE_TEXT)
18✔
1242
        {
1243
        }
18✔
1244

1245
        field_def(const intern_string_t name, logline_value_meta meta)
66✔
1246
            : fd_name(name), fd_meta(meta)
66✔
1247
        {
1248
        }
66✔
1249

1250
        field_def(size_t col,
11,008✔
1251
                  const char* name,
1252
                  value_kind_t kind,
1253
                  bool ident = false,
1254
                  bool foreign_key = false,
1255
                  std::string coll = "")
1256
            : fd_name(intern_string::lookup(name)),
22,016✔
1257
              fd_meta(
22,016✔
1258
                  intern_string::lookup(sql_safe_ident(string_fragment(name))),
22,016✔
1259
                  kind,
1260
                  logline_value_meta::table_column{col}),
11,008✔
1261
              fd_collator(std::move(coll))
11,008✔
1262
        {
1263
            this->fd_meta.lvm_identifier = ident;
11,008✔
1264
            this->fd_meta.lvm_foreign_key = foreign_key;
11,008✔
1265
        }
11,008✔
1266

1267
        field_def& with_kind(value_kind_t kind,
1268
                             bool identifier = false,
1269
                             const std::string& collator = "")
1270
        {
1271
            this->fd_meta.lvm_kind = kind;
1272
            this->fd_meta.lvm_identifier = identifier;
1273
            this->fd_collator = collator;
1274
            return *this;
1275
        }
1276

1277
        field_def& with_numeric_index(int index)
50✔
1278
        {
1279
            this->fd_numeric_index = index;
50✔
1280
            return *this;
50✔
1281
        }
1282
    };
1283

1284
    static std::unordered_map<const intern_string_t, logline_value_meta>
1285
        FIELD_META;
1286

1287
    struct field_to_struct_t {
1288
        field_to_struct_t(const char* prefix, const char* struct_name)
2,752✔
1289
            : fs_prefix(prefix),
2,752✔
1290
              fs_struct_name(intern_string::lookup(struct_name))
5,504✔
1291
        {
1292
        }
2,752✔
1293

1294
        const char* fs_prefix;
1295
        intern_string_t fs_struct_name;
1296
    };
1297

1298
    static const std::array<field_def, 16>& get_known_fields()
703✔
1299
    {
1300
        static size_t KNOWN_FIELD_INDEX = 0;
1301
        static const std::array<field_def, 16> RETVAL = {
1302
            field_def{
1303
                KNOWN_FIELD_INDEX++,
1304
                "cs-method",
1305
                value_kind_t::VALUE_TEXT,
1306
                true,
1307
            },
1308
            {
1309
                KNOWN_FIELD_INDEX++,
1310
                "c-ip",
1311
                value_kind_t::VALUE_TEXT,
1312
                true,
1313
                false,
1314
                "ipaddress",
1315
            },
1316
            {
1317
                KNOWN_FIELD_INDEX++,
1318
                "cs-bytes",
1319
                value_kind_t::VALUE_INTEGER,
1320
                false,
1321
            },
1322
            {
1323
                KNOWN_FIELD_INDEX++,
1324
                "cs-host",
1325
                value_kind_t::VALUE_TEXT,
1326
                true,
1327
            },
1328
            {
1329
                KNOWN_FIELD_INDEX++,
1330
                "cs-uri-stem",
1331
                value_kind_t::VALUE_TEXT,
1332
                true,
1333
                false,
1334
                "naturalnocase",
1335
            },
1336
            {
1337
                KNOWN_FIELD_INDEX++,
1338
                "cs-uri-query",
1339
                value_kind_t::VALUE_TEXT,
1340
                false,
1341
            },
1342
            {
1343
                KNOWN_FIELD_INDEX++,
1344
                "cs-username",
1345
                value_kind_t::VALUE_TEXT,
1346
                false,
1347
            },
1348
            {
1349
                KNOWN_FIELD_INDEX++,
1350
                "cs-version",
1351
                value_kind_t::VALUE_TEXT,
1352
                true,
1353
            },
1354
            {
1355
                KNOWN_FIELD_INDEX++,
1356
                "s-ip",
1357
                value_kind_t::VALUE_TEXT,
1358
                true,
1359
                false,
1360
                "ipaddress",
1361
            },
1362
            {
1363
                KNOWN_FIELD_INDEX++,
1364
                "s-port",
1365
                value_kind_t::VALUE_INTEGER,
1366
                true,
1367
            },
1368
            {
1369
                KNOWN_FIELD_INDEX++,
1370
                "s-computername",
1371
                value_kind_t::VALUE_TEXT,
1372
                true,
1373
            },
1374
            {
1375
                KNOWN_FIELD_INDEX++,
1376
                "s-sitename",
1377
                value_kind_t::VALUE_TEXT,
1378
                true,
1379
            },
1380
            {
1381
                KNOWN_FIELD_INDEX++,
1382
                "sc-bytes",
1383
                value_kind_t::VALUE_INTEGER,
1384
                false,
1385
            },
1386
            {
1387
                KNOWN_FIELD_INDEX++,
1388
                "sc-status",
1389
                value_kind_t::VALUE_INTEGER,
1390
                false,
1391
                true,
1392
            },
1393
            {
1394
                KNOWN_FIELD_INDEX++,
1395
                "sc-substatus",
1396
                value_kind_t::VALUE_INTEGER,
1397
                false,
1398
            },
1399
            {
1400
                KNOWN_FIELD_INDEX++,
1401
                "time-taken",
1402
                value_kind_t::VALUE_FLOAT,
1403
                false,
1404
            },
1405
        };
2,079✔
1406

1407
        return RETVAL;
703✔
1408
    }
1409

1410
    static const std::array<field_to_struct_t, 4>& get_known_struct_fields()
700✔
1411
    {
1412
        static const std::array<field_to_struct_t, 4> RETVAL = {
1413
            field_to_struct_t{"cs(", "cs_headers"},
1414
            {"sc(", "sc_headers"},
1415
            {"rs(", "rs_headers"},
1416
            {"sr(", "sr_headers"},
1417
        };
700✔
1418

1419
        return RETVAL;
700✔
1420
    }
1421

1422
    w3c_log_format()
833✔
1423
    {
833✔
1424
        this->lf_is_self_describing = true;
833✔
1425
        this->lf_time_ordered = false;
833✔
1426
        this->lf_structured = true;
833✔
1427
    }
833✔
1428

1429
    const intern_string_t get_name() const override
17,734✔
1430
    {
1431
        static const intern_string_t name(intern_string::lookup("w3c_log"));
19,388✔
1432

1433
        return this->wlf_format_name.empty() ? name : this->wlf_format_name;
17,734✔
1434
    }
1435

1436
    void clear() override
15,171✔
1437
    {
1438
        this->log_format::clear();
15,171✔
1439
        this->wlf_time_scanner.clear();
15,171✔
1440
        this->wlf_format_name.clear();
15,171✔
1441
        this->wlf_field_defs.clear();
15,171✔
1442
    }
15,171✔
1443

1444
    std::vector<logline_value_meta> get_value_metadata() const override
×
1445
    {
1446
        std::vector<logline_value_meta> retval;
×
1447

1448
        for (const auto& fd : this->wlf_field_defs) {
×
1449
            retval.emplace_back(fd.fd_meta);
×
1450
        }
1451
        return retval;
×
1452
    }
×
1453

1454
    scan_result_t scan_int(std::vector<logline>& dst,
1,309✔
1455
                           const line_info& li,
1456
                           shared_buffer_ref& sbr,
1457
                           scan_batch_context& sbc)
1458
    {
1459
        static const intern_string_t F_DATE_LOCAL
1460
            = intern_string::lookup("date-local");
1,337✔
1461
        static const intern_string_t F_DATE_UTC
1462
            = intern_string::lookup("date-UTC");
1,337✔
1463
        static const intern_string_t F_TIME_LOCAL
1464
            = intern_string::lookup("time-local");
1,337✔
1465
        static const intern_string_t F_TIME_UTC
1466
            = intern_string::lookup("time-UTC");
1,337✔
1467
        static const intern_string_t F_STATUS_CODE
1468
            = intern_string::lookup("sc-status");
1,337✔
1469

1470
        ws_separated_string ss(sbr.get_data(), sbr.length());
1,309✔
1471
        timeval date_tv{0, 0}, time_tv{0, 0};
1,309✔
1472
        exttm date_tm, time_tm;
1,309✔
1473
        size_t found_date = 0;
1,309✔
1474
        size_t found_time = 0;
1,309✔
1475
        log_level_t level = LEVEL_INFO;
1,309✔
1476

1477
        sbc.sbc_value_stats.resize(this->wlf_field_defs.size());
1,309✔
1478
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
19,676✔
1479
            if (iter.index() >= this->wlf_field_defs.size()) {
18,575✔
1480
                level = LEVEL_INVALID;
×
1481
                break;
×
1482
            }
1483

1484
            const auto& fd = this->wlf_field_defs[iter.index()];
18,575✔
1485
            string_fragment sf = *iter;
18,575✔
1486

1487
            if (sf.startswith("#")) {
18,575✔
1488
                if (sf == "#Date:") {
208✔
1489
                    auto sbr_sf_opt
1490
                        = sbr.to_string_fragment().consume_n(sf.length());
52✔
1491

1492
                    if (sbr_sf_opt) {
52✔
1493
                        auto sbr_sf = sbr_sf_opt.value().trim();
52✔
1494
                        date_time_scanner dts;
52✔
1495
                        exttm tm;
52✔
1496
                        timeval tv;
1497

1498
                        if (dts.scan(sbr_sf.data(),
52✔
1499
                                     sbr_sf.length(),
52✔
1500
                                     nullptr,
1501
                                     &tm,
1502
                                     tv))
1503
                        {
1504
                            this->lf_date_time.set_base_time(tv.tv_sec,
52✔
1505
                                                             tm.et_tm);
1506
                            this->wlf_time_scanner.set_base_time(tv.tv_sec,
52✔
1507
                                                                 tm.et_tm);
1508
                        }
1509
                    }
1510
                }
1511
                dst.emplace_back(li.li_file_range.fr_offset,
208✔
1512
                                 std::chrono::microseconds{0},
×
1513
                                 LEVEL_UNKNOWN);
208✔
1514
                dst.back().set_ignore(true);
208✔
1515
                return scan_match{2000};
208✔
1516
            }
1517

1518
            sf = sf.trim("\" \t");
18,367✔
1519
            if (F_DATE == fd.fd_name || F_DATE_LOCAL == fd.fd_name
35,674✔
1520
                || F_DATE_UTC == fd.fd_name)
35,674✔
1521
            {
1522
                if (this->lf_date_time.scan(
1,068✔
1523
                        sf.data(), sf.length(), nullptr, &date_tm, date_tv))
1,068✔
1524
                {
1525
                    this->lf_timestamp_flags |= date_tm.et_flags;
1,068✔
1526
                    found_date += 1;
1,068✔
1527
                }
1528
            } else if (F_TIME == fd.fd_name || F_TIME_LOCAL == fd.fd_name
33,508✔
1529
                       || F_TIME_UTC == fd.fd_name)
33,508✔
1530
            {
1531
                if (this->wlf_time_scanner.scan(
1,098✔
1532
                        sf.data(), sf.length(), nullptr, &time_tm, time_tv))
1,098✔
1533
                {
1534
                    this->lf_timestamp_flags |= time_tm.et_flags;
1,098✔
1535
                    found_time += 1;
1,098✔
1536
                }
1537
            } else if (F_STATUS_CODE == fd.fd_name) {
16,201✔
1538
                if (!sf.empty() && sf[0] >= '4') {
1,098✔
1539
                    level = LEVEL_ERROR;
1,018✔
1540
                }
1541
            }
1542

1543
            if (fd.fd_numeric_index) {
18,367✔
1544
                switch (fd.fd_meta.lvm_kind) {
6,401✔
1545
                    case value_kind_t::VALUE_INTEGER:
6,401✔
1546
                    case value_kind_t::VALUE_FLOAT: {
1547
                        auto scan_float_res
1548
                            = scn::scan_value<double>(sf.to_string_view());
6,401✔
1549

1550
                        if (scan_float_res) {
6,401✔
1551
                            sbc.sbc_value_stats[fd.fd_numeric_index.value()]
6,397✔
1552
                                .add_value(scan_float_res->value());
6,397✔
1553
                        }
1554
                        break;
6,401✔
1555
                    }
1556
                    default:
×
1557
                        break;
×
1558
                }
1559
            }
1560
        }
1561

1562
        if (found_time == 1 && found_date <= 1) {
1,101✔
1563
            auto tm = time_tm;
1,098✔
1564

1565
            if (found_date) {
1,098✔
1566
                tm.et_tm.tm_year = date_tm.et_tm.tm_year;
1,068✔
1567
                tm.et_tm.tm_mday = date_tm.et_tm.tm_mday;
1,068✔
1568
                tm.et_tm.tm_mon = date_tm.et_tm.tm_mon;
1,068✔
1569
                tm.et_tm.tm_wday = date_tm.et_tm.tm_wday;
1,068✔
1570
                tm.et_tm.tm_yday = date_tm.et_tm.tm_yday;
1,068✔
1571
            }
1572

1573
            auto tv = tm.to_timeval();
1,098✔
1574
            if (!this->lf_specialized) {
1,098✔
1575
                for (auto& ll : dst) {
66✔
1576
                    ll.set_ignore(true);
54✔
1577
                }
1578
            }
1579
            dst.emplace_back(li.li_file_range.fr_offset, to_us(tv), level);
1,098✔
1580
            return scan_match{2000};
1,098✔
1581
        }
1582

1583
        return scan_no_match{"no header found"};
3✔
1584
    }
1585

1586
    scan_result_t scan(logfile& lf,
13,081✔
1587
                       std::vector<logline>& dst,
1588
                       const line_info& li,
1589
                       shared_buffer_ref& sbr,
1590
                       scan_batch_context& sbc) override
1591
    {
1592
        static const auto* W3C_LOG_NAME = intern_string::lookup("w3c_log");
14,457✔
1593
        static const auto* X_FIELDS_NAME = intern_string::lookup("x_fields");
14,457✔
1594
        static const auto& KNOWN_FIELDS = get_known_fields();
13,081✔
1595
        static const auto& KNOWN_STRUCT_FIELDS = get_known_struct_fields();
13,081✔
1596
        static auto X_FIELDS_IDX = 0;
1597

1598
        if (li.li_partial) {
13,081✔
1599
            return scan_incomplete{};
27✔
1600
        }
1601

1602
        if (dst.empty()) {
13,054✔
1603
            auto file_options = lf.get_file_options();
1,279✔
1604

1605
            if (file_options) {
1,279✔
1606
                this->lf_date_time.dts_default_zone
1607
                    = file_options->second.fo_default_zone.pp_value;
57✔
1608
            } else {
1609
                this->lf_date_time.dts_default_zone = nullptr;
1,222✔
1610
            }
1611
        }
1,279✔
1612

1613
        if (!this->wlf_format_name.empty()) {
13,054✔
1614
            return this->scan_int(dst, li, sbr, sbc);
1,294✔
1615
        }
1616

1617
        if (dst.size() <= 2 || dst.size() > 20 || sbr.empty()
21,053✔
1618
            || sbr.get_data()[0] == '#')
21,053✔
1619
        {
1620
            return scan_no_match{"no header found"};
9,650✔
1621
        }
1622

1623
        this->clear();
2,110✔
1624

1625
        for (auto line_iter = dst.begin(); line_iter != dst.end(); ++line_iter)
22,442✔
1626
        {
1627
            auto next_read_result = lf.read_line(line_iter);
20,332✔
1628

1629
            if (next_read_result.isErr()) {
20,332✔
1630
                return scan_no_match{"unable to read first line"};
×
1631
            }
1632

1633
            auto line = next_read_result.unwrap();
20,332✔
1634
            ws_separated_string ss(line.get_data(), line.length());
20,332✔
1635
            auto iter = ss.begin();
20,332✔
1636
            const auto directive = *iter;
20,332✔
1637

1638
            if (directive.empty() || directive[0] != '#') {
20,332✔
1639
                continue;
17,551✔
1640
            }
1641

1642
            ++iter;
2,781✔
1643
            if (iter == ss.end()) {
2,781✔
1644
                continue;
38✔
1645
            }
1646

1647
            if (directive == "#Date:") {
2,743✔
1648
                date_time_scanner dts;
10✔
1649
                struct exttm tm;
10✔
1650
                struct timeval tv;
1651

1652
                if (dts.scan(line.get_data_at(directive.length() + 1),
10✔
1653
                             line.length() - directive.length() - 1,
10✔
1654
                             nullptr,
1655
                             &tm,
1656
                             tv))
1657
                {
1658
                    this->lf_date_time.set_base_time(tv.tv_sec, tm.et_tm);
10✔
1659
                    this->wlf_time_scanner.set_base_time(tv.tv_sec, tm.et_tm);
10✔
1660
                }
1661
            } else if (directive == "#Fields:" && this->wlf_field_defs.empty())
2,733✔
1662
            {
1663
                int numeric_count = 0;
15✔
1664

1665
                do {
1666
                    auto sf = (*iter).trim(")");
200✔
1667

1668
                    auto field_iter = std::find_if(
600✔
1669
                        begin(KNOWN_FIELDS),
1670
                        end(KNOWN_FIELDS),
1671
                        [&sf](auto elem) { return sf == elem.fd_name; });
2,253✔
1672
                    if (field_iter != end(KNOWN_FIELDS)) {
400✔
1673
                        this->wlf_field_defs.emplace_back(*field_iter);
116✔
1674
                        auto& fd = this->wlf_field_defs.back();
116✔
1675
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
116✔
1676
                        if (common_iter == FIELD_META.end()) {
116✔
1677
                            auto emp_res = FIELD_META.emplace(
116✔
1678
                                fd.fd_meta.lvm_name, fd.fd_meta);
116✔
1679
                            common_iter = emp_res.first;
116✔
1680
                        }
1681
                        fd.fd_root_meta = &common_iter->second;
116✔
1682
                    } else if (sf.is_one_of("date", "time")) {
84✔
1683
                        this->wlf_field_defs.emplace_back(
36✔
1684
                            intern_string::lookup(sf));
18✔
1685
                        auto& fd = this->wlf_field_defs.back();
18✔
1686
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
18✔
1687
                        if (common_iter == FIELD_META.end()) {
18✔
1688
                            auto emp_res = FIELD_META.emplace(
18✔
1689
                                fd.fd_meta.lvm_name, fd.fd_meta);
18✔
1690
                            common_iter = emp_res.first;
18✔
1691
                        }
1692
                        fd.fd_root_meta = &common_iter->second;
18✔
1693
                    } else {
1694
                        const auto fs_iter = std::find_if(
198✔
1695
                            begin(KNOWN_STRUCT_FIELDS),
1696
                            end(KNOWN_STRUCT_FIELDS),
1697
                            [&sf](auto elem) {
201✔
1698
                                return sf.startswith(elem.fs_prefix);
201✔
1699
                            });
1700
                        if (fs_iter != end(KNOWN_STRUCT_FIELDS)) {
132✔
1701
                            const intern_string_t field_name
1702
                                = intern_string::lookup(sf.substr(3));
21✔
1703
                            this->wlf_field_defs.emplace_back(
21✔
1704
                                field_name,
1705
                                logline_value_meta(
42✔
1706
                                    field_name,
1707
                                    value_kind_t::VALUE_TEXT,
1708
                                    logline_value_meta::table_column{
×
1709
                                        KNOWN_FIELDS.size() + 1
21✔
1710
                                        + std::distance(
63✔
1711
                                            begin(KNOWN_STRUCT_FIELDS),
1712
                                            fs_iter)},
1713
                                    this)
42✔
1714
                                    .with_struct_name(fs_iter->fs_struct_name));
1715
                        } else {
1716
                            const intern_string_t field_name
1717
                                = intern_string::lookup(sf);
45✔
1718
                            this->wlf_field_defs.emplace_back(
45✔
1719
                                field_name,
1720
                                logline_value_meta(
90✔
1721
                                    field_name,
1722
                                    value_kind_t::VALUE_TEXT,
1723
                                    logline_value_meta::table_column{
×
1724
                                        KNOWN_FIELDS.size() + X_FIELDS_IDX},
90✔
1725
                                    this)
90✔
1726
                                    .with_struct_name(X_FIELDS_NAME));
1727
                        }
1728
                    }
1729
                    auto& fd = this->wlf_field_defs.back();
200✔
1730
                    fd.fd_meta.lvm_format = std::make_optional(this);
200✔
1731
                    switch (fd.fd_meta.lvm_kind) {
200✔
1732
                        case value_kind_t::VALUE_FLOAT:
50✔
1733
                        case value_kind_t::VALUE_INTEGER:
1734
                            fd.with_numeric_index(numeric_count);
50✔
1735
                            numeric_count += 1;
50✔
1736
                            break;
50✔
1737
                        default:
150✔
1738
                            break;
150✔
1739
                    }
1740

1741
                    ++iter;
200✔
1742
                } while (iter != ss.end());
200✔
1743

1744
                this->wlf_format_name = W3C_LOG_NAME;
15✔
1745
            }
1746
        }
37,921✔
1747

1748
        if (!this->wlf_format_name.empty() && !this->wlf_field_defs.empty()) {
2,110✔
1749
            return this->scan_int(dst, li, sbr, sbc);
15✔
1750
        }
1751

1752
        this->wlf_format_name.clear();
2,095✔
1753

1754
        return scan_no_match{"no header found"};
2,095✔
1755
    }
1756

1757
    void annotate(logfile* lf,
1,464✔
1758
                  uint64_t line_number,
1759
                  string_attrs_t& sa,
1760
                  logline_value_vector& values) const override
1761
    {
1762
        auto& sbr = values.lvv_sbr;
1,464✔
1763
        ws_separated_string ss(sbr.get_data(), sbr.length());
1,464✔
1764
        std::optional<line_range> date_lr;
1,464✔
1765
        std::optional<line_range> time_lr;
1,464✔
1766

1767
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
26,154✔
1768
            auto sf = *iter;
24,690✔
1769

1770
            if (iter.index() >= this->wlf_field_defs.size()) {
24,690✔
1771
                sa.emplace_back(line_range{sf.sf_begin, -1},
×
NEW
1772
                                SA_INVALID.value("extra fields detected"s));
×
1773
                return;
×
1774
            }
1775

1776
            const auto& fd = this->wlf_field_defs[iter.index()];
24,690✔
1777

1778
            if (sf == "-") {
24,690✔
1779
                sf.invalidate();
4,300✔
1780
            }
1781

1782
            auto lr = line_range(sf.sf_begin, sf.sf_end);
24,690✔
1783

1784
            if (lr.is_valid()) {
24,690✔
1785
                if (fd.fd_meta.lvm_name == F_DATE) {
20,390✔
1786
                    date_lr = lr;
1,442✔
1787
                } else if (fd.fd_meta.lvm_name == F_TIME) {
18,948✔
1788
                    time_lr = lr;
1,456✔
1789
                }
1790
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
20,390✔
1791
                if (sf.startswith("\"")) {
20,390✔
1792
                    auto& meta = values.lvv_values.back().lv_meta;
28✔
1793

1794
                    if (meta.lvm_kind == value_kind_t::VALUE_TEXT) {
28✔
1795
                        meta.lvm_kind = value_kind_t::VALUE_W3C_QUOTED;
26✔
1796
                    } else {
1797
                        meta.lvm_kind = value_kind_t::VALUE_NULL;
2✔
1798
                    }
1799
                }
1800
            } else {
1801
                values.lvv_values.emplace_back(fd.fd_meta);
4,300✔
1802
            }
1803
            if (fd.fd_root_meta != nullptr) {
24,690✔
1804
                values.lvv_values.back().lv_meta.lvm_user_hidden
20,314✔
1805
                    = fd.fd_root_meta->lvm_user_hidden;
20,314✔
1806
            }
1807
        }
1808
        if (time_lr) {
1,464✔
1809
            auto ts_lr = time_lr.value();
1,456✔
1810
            if (date_lr) {
1,456✔
1811
                if (date_lr->lr_end + 1 == time_lr->lr_start) {
1,442✔
1812
                    ts_lr.lr_start = date_lr->lr_start;
1,442✔
1813
                    ts_lr.lr_end = time_lr->lr_end;
1,442✔
1814
                }
1815
            }
1816

1817
            sa.emplace_back(ts_lr, L_TIMESTAMP.value());
1,456✔
1818
        }
1819
        log_format::annotate(lf, line_number, sa, values);
1,464✔
1820
    }
1821

1822
    std::optional<size_t> stats_index_for_value(
×
1823
        const intern_string_t& name) const override
1824
    {
1825
        for (const auto& wlf_field_def : this->wlf_field_defs) {
×
1826
            if (wlf_field_def.fd_meta.lvm_name == name) {
×
1827
                if (!wlf_field_def.fd_numeric_index) {
×
1828
                    break;
×
1829
                }
1830
                return wlf_field_def.fd_numeric_index.value();
×
1831
            }
1832
        }
1833

1834
        return std::nullopt;
×
1835
    }
1836

1837
    bool hide_field(const intern_string_t field_name, bool val) override
×
1838
    {
1839
        if (field_name == LOG_TIME_STR) {
×
1840
            auto date_iter = FIELD_META.find(F_DATE);
×
1841
            auto time_iter = FIELD_META.find(F_TIME);
×
1842
            if (date_iter == FIELD_META.end() || time_iter == FIELD_META.end())
×
1843
            {
1844
                return false;
×
1845
            }
1846
            date_iter->second.lvm_user_hidden = val;
×
1847
            time_iter->second.lvm_user_hidden = val;
×
1848
            return true;
×
1849
        }
1850

1851
        auto fd_iter = FIELD_META.find(field_name);
×
1852
        if (fd_iter == FIELD_META.end()) {
×
1853
            return false;
×
1854
        }
1855

1856
        fd_iter->second.lvm_user_hidden = val;
×
1857

1858
        return true;
×
1859
    }
1860

1861
    std::map<intern_string_t, logline_value_meta> get_field_states() override
65✔
1862
    {
1863
        std::map<intern_string_t, logline_value_meta> retval;
65✔
1864

1865
        for (const auto& fd : FIELD_META) {
121✔
1866
            retval.emplace(fd.first, fd.second);
56✔
1867
        }
1868

1869
        return retval;
65✔
1870
    }
×
1871

1872
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
12✔
1873
    {
1874
        auto retval = std::make_shared<w3c_log_format>(*this);
12✔
1875

1876
        retval->lf_specialized = true;
12✔
1877
        return retval;
24✔
1878
    }
12✔
1879

1880
    class w3c_log_table : public log_format_vtab_impl {
1881
    public:
1882
        explicit w3c_log_table(std::shared_ptr<const log_format> format)
9✔
1883
            : log_format_vtab_impl(format)
9✔
1884
        {
1885
        }
9✔
1886

1887
        void get_columns(std::vector<vtab_column>& cols) const override
12✔
1888
        {
1889
            for (const auto& fd : get_known_fields()) {
204✔
1890
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
192✔
1891
                    fd.fd_meta.lvm_kind);
192✔
1892

1893
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
192✔
1894
                                  type_pair.first,
1895
                                  fd.fd_collator,
192✔
1896
                                  false,
384✔
1897
                                  "",
1898
                                  type_pair.second);
1899
            }
1900
            cols.emplace_back("x_fields");
12✔
1901
            cols.back().with_comment(
24✔
1902
                "A JSON-object that contains fields that are not first-class "
1903
                "columns");
1904
            for (const auto& fs : get_known_struct_fields()) {
60✔
1905
                cols.emplace_back(fs.fs_struct_name.to_string());
48✔
1906
            }
1907
        }
12✔
1908

1909
        void get_foreign_keys(
3✔
1910
            std::unordered_set<std::string>& keys_inout) const override
1911
        {
1912
            this->log_vtab_impl::get_foreign_keys(keys_inout);
3✔
1913

1914
            for (const auto& fd : get_known_fields()) {
51✔
1915
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
48✔
1916
                    keys_inout.emplace(fd.fd_meta.lvm_name.to_string());
30✔
1917
                }
1918
            }
1919
        }
3✔
1920
    };
1921

1922
    static std::map<intern_string_t, std::shared_ptr<w3c_log_table>>&
1923
    get_tables()
9✔
1924
    {
1925
        static std::map<intern_string_t, std::shared_ptr<w3c_log_table>> retval;
9✔
1926

1927
        return retval;
9✔
1928
    }
1929

1930
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
721✔
1931
    {
1932
        if (this->wlf_format_name.empty()) {
721✔
1933
            return nullptr;
712✔
1934
        }
1935

1936
        std::shared_ptr<w3c_log_table> retval = nullptr;
9✔
1937

1938
        auto& tables = get_tables();
9✔
1939
        const auto iter = tables.find(this->wlf_format_name);
9✔
1940
        if (iter == tables.end()) {
9✔
1941
            retval = std::make_shared<w3c_log_table>(this->shared_from_this());
9✔
1942
            tables[this->wlf_format_name] = retval;
9✔
1943
        }
1944

1945
        return retval;
9✔
1946
    }
9✔
1947

1948
    void get_subline(const log_format_file_state& lffs,
1,622✔
1949
                     const logline& ll,
1950
                     shared_buffer_ref& sbr,
1951
                     subline_options opts) override
1952
    {
1953
    }
1,622✔
1954

1955
    date_time_scanner wlf_time_scanner;
1956
    intern_string_t wlf_format_name;
1957
    std::vector<field_def> wlf_field_defs;
1958
};
1959

1960
std::unordered_map<const intern_string_t, logline_value_meta>
1961
    w3c_log_format::FIELD_META;
1962

1963
const intern_string_t w3c_log_format::F_DATE = intern_string::lookup("date");
1964
const intern_string_t w3c_log_format::F_TIME = intern_string::lookup("time");
1965

1966
struct logfmt_pair_handler {
1967
    explicit logfmt_pair_handler(date_time_scanner& dts) : lph_dt_scanner(dts)
13,061✔
1968
    {
1969
    }
13,061✔
1970

1971
    log_format::scan_result_t process_value(const string_fragment& value_frag)
4,146✔
1972
    {
1973
        if (this->lph_key_frag.is_one_of(
4,146✔
1974
                "timestamp"_frag, "time"_frag, "ts"_frag, "t"_frag))
1975
        {
1976
            if (!this->lph_dt_scanner.scan(value_frag.data(),
49✔
1977
                                           value_frag.length(),
49✔
1978
                                           nullptr,
1979
                                           &this->lph_time_tm,
1980
                                           this->lph_tv))
49✔
1981
            {
1982
                return log_format::scan_no_match{
12✔
1983
                    "timestamp value did not parse correctly"};
12✔
1984
            }
1985
            char buf[1024];
1986
            this->lph_dt_scanner.ftime(
37✔
1987
                buf, sizeof(buf), nullptr, this->lph_time_tm);
37✔
1988
            this->lph_found_time += 1;
37✔
1989
        } else if (this->lph_key_frag.is_one_of("level"_frag, "lvl"_frag)) {
4,097✔
1990
            this->lph_level
1991
                = string2level(value_frag.data(), value_frag.length());
46✔
1992
        }
1993
        return log_format::scan_match{};
4,134✔
1994
    }
1995

1996
    date_time_scanner& lph_dt_scanner;
1997
    size_t lph_found_time{0};
1998
    exttm lph_time_tm;
1999
    timeval lph_tv{0, 0};
2000
    log_level_t lph_level{log_level_t::LEVEL_INFO};
2001
    string_fragment lph_key_frag{""};
2002
};
2003

2004
class logfmt_format : public log_format {
2005
public:
2006
    const intern_string_t get_name() const override
16,129✔
2007
    {
2008
        const static intern_string_t NAME = intern_string::lookup("logfmt_log");
17,783✔
2009

2010
        return NAME;
16,129✔
2011
    }
2012

2013
    class logfmt_log_table : public log_format_vtab_impl {
2014
    public:
2015
        logfmt_log_table(std::shared_ptr<const log_format> format)
712✔
2016
            : log_format_vtab_impl(format)
712✔
2017
        {
2018
        }
712✔
2019

2020
        void get_columns(std::vector<vtab_column>& cols) const override
713✔
2021
        {
2022
            static const auto FIELDS = std::string("fields");
2,137✔
2023

2024
            cols.emplace_back(FIELDS);
713✔
2025
        }
713✔
2026
    };
2027

2028
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
712✔
2029
    {
2030
        static auto retval
2031
            = std::make_shared<logfmt_log_table>(this->shared_from_this());
712✔
2032

2033
        return retval;
712✔
2034
    }
2035

2036
    scan_result_t scan(logfile& lf,
13,061✔
2037
                       std::vector<logline>& dst,
2038
                       const line_info& li,
2039
                       shared_buffer_ref& sbr,
2040
                       scan_batch_context& sbc) override
2041
    {
2042
        auto p = logfmt::parser(sbr.to_string_fragment());
13,061✔
2043
        scan_result_t retval = scan_no_match{};
13,061✔
2044
        bool done = false;
13,061✔
2045
        logfmt_pair_handler lph(this->lf_date_time);
13,061✔
2046

2047
        if (dst.empty()) {
13,061✔
2048
            auto file_options = lf.get_file_options();
1,295✔
2049

2050
            if (file_options) {
1,295✔
2051
                this->lf_date_time.dts_default_zone
2052
                    = file_options->second.fo_default_zone.pp_value;
57✔
2053
            } else {
2054
                this->lf_date_time.dts_default_zone = nullptr;
1,238✔
2055
            }
2056
        }
1,295✔
2057

2058
        while (!done) {
46,559✔
2059
            auto parse_result = p.step();
33,498✔
2060

2061
            auto value_res = parse_result.match(
2062
                [&done](const logfmt::parser::end_of_input&) -> scan_result_t {
×
2063
                    done = true;
12,761✔
2064
                    return scan_match{};
12,761✔
2065
                },
2066
                [](const string_fragment&) -> scan_result_t {
×
2067
                    return scan_incomplete{};
16,303✔
2068
                },
2069
                [&lph](const logfmt::parser::kvpair& kvp) -> scan_result_t {
×
2070
                    lph.lph_key_frag = kvp.first;
4,146✔
2071

2072
                    return kvp.second.match(
2073
                        [](const logfmt::parser::bool_value& bv)
×
2074
                            -> scan_result_t { return scan_match{}; },
×
2075
                        [&lph](const logfmt::parser::float_value& fv)
×
2076
                            -> scan_result_t {
2077
                            return lph.process_value(fv.fv_str_value);
5✔
2078
                        },
2079
                        [&lph](const logfmt::parser::int_value& iv)
×
2080
                            -> scan_result_t {
2081
                            return lph.process_value(iv.iv_str_value);
112✔
2082
                        },
2083
                        [&lph](const logfmt::parser::quoted_value& qv)
×
2084
                            -> scan_result_t {
2085
                            auto_mem<yajl_handle_t> handle(yajl_free);
353✔
2086
                            yajl_callbacks cb;
2087
                            scan_result_t retval;
353✔
2088

2089
                            memset(&cb, 0, sizeof(cb));
353✔
2090
                            handle = yajl_alloc(&cb, nullptr, &lph);
353✔
2091
                            cb.yajl_string = +[](void* ctx,
706✔
2092
                                                 const unsigned char* str,
2093
                                                 size_t len,
2094
                                                 yajl_string_props_t*) -> int {
2095
                                auto& lph = *((logfmt_pair_handler*) ctx);
353✔
2096
                                string_fragment value_frag{str, 0, (int) len};
353✔
2097

2098
                                auto value_res = lph.process_value(value_frag);
353✔
2099
                                return value_res.is<scan_match>();
706✔
2100
                            };
706✔
2101

2102
                            if (yajl_parse(
353✔
2103
                                    handle,
2104
                                    (const unsigned char*) qv.qv_value.data(),
353✔
2105
                                    qv.qv_value.length())
353✔
2106
                                    != yajl_status_ok
2107
                                || yajl_complete_parse(handle)
353✔
2108
                                    != yajl_status_ok)
2109
                            {
2110
                                log_debug("json parsing failed");
×
2111
                                string_fragment unq_frag{
2112
                                    qv.qv_value.sf_string,
×
2113
                                    qv.qv_value.sf_begin + 1,
×
2114
                                    qv.qv_value.sf_end - 1,
×
2115
                                };
2116

2117
                                return lph.process_value(unq_frag);
×
2118
                            }
2119

2120
                            return scan_match{};
353✔
2121
                        },
353✔
2122
                        [&lph](const logfmt::parser::unquoted_value& uv)
4,146✔
2123
                            -> scan_result_t {
2124
                            return lph.process_value(uv.uv_value);
3,676✔
2125
                        });
8,292✔
2126
                },
2127
                [](const logfmt::parser::error& err) -> scan_result_t {
×
2128
                    // log_error("logfmt parse error: %s", err.e_msg.c_str());
2129
                    return scan_no_match{};
288✔
2130
                });
33,498✔
2131
            if (value_res.is<scan_no_match>()) {
33,498✔
2132
                retval = value_res;
300✔
2133
                done = true;
300✔
2134
            }
2135
        }
33,498✔
2136

2137
        if (lph.lph_found_time == 1) {
13,061✔
2138
            this->lf_timestamp_flags = lph.lph_time_tm.et_flags;
37✔
2139
            dst.emplace_back(
37✔
2140
                li.li_file_range.fr_offset, to_us(lph.lph_tv), lph.lph_level);
37✔
2141
            retval = scan_match{500};
37✔
2142
        }
2143

2144
        return retval;
26,122✔
2145
    }
×
2146

2147
    void annotate(logfile* lf,
16✔
2148
                  uint64_t line_number,
2149
                  string_attrs_t& sa,
2150
                  logline_value_vector& values) const override
2151
    {
2152
        static const intern_string_t FIELDS_NAME
2153
            = intern_string::lookup("fields");
22✔
2154

2155
        auto& sbr = values.lvv_sbr;
16✔
2156
        auto p = logfmt::parser(sbr.to_string_fragment());
16✔
2157
        auto done = false;
16✔
2158
        size_t found_body = 0;
16✔
2159

2160
        while (!done) {
133✔
2161
            auto parse_result = p.step();
117✔
2162

2163
            done = parse_result.match(
234✔
2164
                [](const logfmt::parser::end_of_input&) { return true; },
16✔
2165
                [](const string_fragment&) { return false; },
×
2166
                [this, &sa, &values, &found_body](
×
2167
                    const logfmt::parser::kvpair& kvp) {
2168
                    auto value_frag = kvp.second.match(
101✔
2169
                        [this, &kvp, &values](
×
2170
                            const logfmt::parser::bool_value& bv) {
2171
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2172
                                                              kvp.first),
×
2173
                                                          value_kind_t::
2174
                                                              VALUE_INTEGER,
2175
                                                          logline_value_meta::
2176
                                                              table_column{0},
×
2177
                                                          (log_format*) this}
×
2178
                                           .with_struct_name(FIELDS_NAME);
×
2179
                            values.lvv_values.emplace_back(lvm, bv.bv_value);
×
2180
                            values.lvv_values.back().lv_origin
×
2181
                                = to_line_range(bv.bv_str_value);
×
2182

2183
                            return bv.bv_str_value;
×
2184
                        },
×
2185
                        [this, &kvp, &values](
×
2186
                            const logfmt::parser::int_value& iv) {
2187
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2188
                                                              kvp.first),
×
2189
                                                          value_kind_t::
2190
                                                              VALUE_INTEGER,
2191
                                                          logline_value_meta::
2192
                                                              table_column{0},
×
2193
                                                          (log_format*) this}
×
2194
                                           .with_struct_name(FIELDS_NAME);
×
2195
                            values.lvv_values.emplace_back(lvm, iv.iv_value);
×
2196
                            values.lvv_values.back().lv_origin
×
2197
                                = to_line_range(iv.iv_str_value);
×
2198
                            return iv.iv_str_value;
×
2199
                        },
×
2200
                        [this, &kvp, &values](
101✔
2201
                            const logfmt::parser::float_value& fv) {
2202
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2203
                                                              kvp.first),
×
2204
                                                          value_kind_t::
2205
                                                              VALUE_INTEGER,
2206
                                                          logline_value_meta::
2207
                                                              table_column{0},
×
2208
                                                          (log_format*) this}
×
2209
                                           .with_struct_name(FIELDS_NAME);
×
2210
                            values.lvv_values.emplace_back(lvm, fv.fv_value);
×
2211
                            values.lvv_values.back().lv_origin
×
2212
                                = to_line_range(fv.fv_str_value);
×
2213

2214
                            return fv.fv_str_value;
×
2215
                        },
×
2216
                        [](const logfmt::parser::quoted_value& qv) {
×
2217
                            return qv.qv_value;
31✔
2218
                        },
2219
                        [](const logfmt::parser::unquoted_value& uv) {
×
2220
                            return uv.uv_value;
70✔
2221
                        });
2222
                    auto value_lr = to_line_range(value_frag);
101✔
2223

2224
                    auto known_field = false;
101✔
2225
                    if (kvp.first.is_one_of(
101✔
2226
                            "timestamp"_frag, "time"_frag, "ts"_frag, "t"_frag))
2227
                    {
2228
                        sa.emplace_back(value_lr, L_TIMESTAMP.value());
16✔
2229
                        known_field = true;
16✔
2230
                    } else if (kvp.first.is_one_of("level"_frag, "lvl"_frag)) {
85✔
2231
                        sa.emplace_back(value_lr, L_LEVEL.value());
16✔
2232
                        known_field = true;
16✔
2233
                    } else if (kvp.first.is_one_of("msg"_frag,
69✔
2234
                                                   "message"_frag)) {
2235
                        sa.emplace_back(value_lr, SA_BODY.value());
16✔
2236
                        found_body += 1;
16✔
2237
                    } else if (kvp.second.is<logfmt::parser::quoted_value>()
53✔
2238
                               || kvp.second
102✔
2239
                                      .is<logfmt::parser::unquoted_value>())
49✔
2240
                    {
2241
                        auto vkind = value_frag.startswith("\"")
53✔
2242
                            ? value_kind_t::VALUE_JSON
53✔
2243
                            : value_kind_t::VALUE_TEXT;
53✔
2244
                        auto lvm = logline_value_meta{
2245
                            intern_string::lookup(kvp.first),
53✔
2246
                            vkind,
2247
                            logline_value_meta::table_column{0},
×
2248
                            (log_format*) this,
×
2249
                        };
53✔
2250
                        lvm.with_struct_name(FIELDS_NAME);
53✔
2251
                        values.lvv_values.emplace_back(lvm, value_frag);
53✔
2252
                        values.lvv_values.back().lv_origin = value_lr;
53✔
2253
                    }
53✔
2254
                    if (known_field) {
101✔
2255
                        auto key_with_eq = kvp.first;
32✔
2256
                        key_with_eq.sf_end += 1;
32✔
2257
                        sa.emplace_back(to_line_range(key_with_eq),
32✔
2258
                                        SA_REPLACED.value());
64✔
2259
                    } else {
2260
                        sa.emplace_back(to_line_range(kvp.first),
69✔
2261
                                        VC_ROLE.value(role_t::VCR_OBJECT_KEY));
138✔
2262
                    }
2263
                    return false;
101✔
2264
                },
2265
                [line_number, &sbr](const logfmt::parser::error& err) {
117✔
2266
                    log_error(
×
2267
                        "bad line %.*s", (int) sbr.length(), sbr.get_data());
2268
                    log_error("%lld:logfmt parse error: %s",
×
2269
                              line_number,
2270
                              err.e_msg.c_str());
2271
                    return true;
×
2272
                });
2273
        }
117✔
2274

2275
        if (found_body == 1) {
16✔
2276
            sa.emplace_back(line_range::empty_at(sbr.length()),
16✔
2277
                            SA_BODY.value());
32✔
2278
        }
2279

2280
        log_format::annotate(lf, line_number, sa, values);
16✔
2281
    }
16✔
2282

2283
    std::shared_ptr<log_format> specialized(int fmt_lock) override
6✔
2284
    {
2285
        auto retval = std::make_shared<logfmt_format>(*this);
6✔
2286

2287
        retval->lf_specialized = true;
6✔
2288
        return retval;
12✔
2289
    }
6✔
2290
};
2291

2292
static auto format_binder = injector::bind_multiple<log_format>()
2293
                                .add<logfmt_format>()
2294
                                .add<bro_log_format>()
2295
                                .add<w3c_log_format>()
2296
                                .add<generic_log_format>()
2297
                                .add<piper_log_format>();
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc