• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tstack / lnav / 18885671937-2611

28 Oct 2025 06:41PM UTC coverage: 68.891% (+0.001%) from 68.89%
18885671937-2611

push

github

tstack
[time_util] use std::chrono::microseconds instead of timeval

115 of 163 new or added lines in 15 files covered. (70.55%)

5 existing lines in 4 files now uncovered.

50216 of 72892 relevant lines covered (68.89%)

424384.78 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.72
/src/log_format_impls.cc
1
/**
2
 * Copyright (c) 2007-2017, Timothy Stack
3
 *
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * * Redistributions of source code must retain the above copyright notice, this
10
 * list of conditions and the following disclaimer.
11
 * * Redistributions in binary form must reproduce the above copyright notice,
12
 * this list of conditions and the following disclaimer in the documentation
13
 * and/or other materials provided with the distribution.
14
 * * Neither the name of Timothy Stack nor the names of its contributors
15
 * may be used to endorse or promote products derived from this software
16
 * without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 *
29
 * @file log_format_impls.cc
30
 */
31

32
#include <algorithm>
33
#include <chrono>
34
#include <memory>
35
#include <utility>
36

37
#include "log_format.hh"
38

39
#include <stdio.h>
40

41
#include "base/injector.bind.hh"
42
#include "base/opt_util.hh"
43
#include "base/string_attr_type.hh"
44
#include "config.h"
45
#include "formats/logfmt/logfmt.parser.hh"
46
#include "log_vtab_impl.hh"
47
#include "ptimec.hh"
48
#include "scn/scan.h"
49
#include "sql_util.hh"
50
#include "yajlpp/yajlpp.hh"
51

52
class piper_log_format : public log_format {
53
public:
54
    const intern_string_t get_name() const override
13,459✔
55
    {
56
        static const intern_string_t RETVAL
57
            = intern_string::lookup("lnav_piper_log");
14,901✔
58

59
        return RETVAL;
13,459✔
60
    }
61

62
    scan_result_t scan(logfile& lf,
10,803✔
63
                       std::vector<logline>& dst,
64
                       const line_info& li,
65
                       shared_buffer_ref& sbr,
66
                       scan_batch_context& sbc) override
67
    {
68
        if (lf.has_line_metadata()
10,803✔
69
            && lf.get_text_format() == text_format_t::TF_LOG)
10,803✔
70
        {
71
            dst.emplace_back(
293✔
72
                li.li_file_range.fr_offset, li.li_timestamp, li.li_level);
293✔
73
            return scan_match{1};
293✔
74
        }
75

76
        return scan_no_match{"not a piper capture"};
10,510✔
77
    }
78

79
    static constexpr int TIMESTAMP_SIZE = 28;
80

81
    void annotate(logfile* lf,
41✔
82
                  uint64_t line_number,
83
                  string_attrs_t& sa,
84
                  logline_value_vector& values,
85
                  bool annotate_module) const override
86
    {
87
        auto lr = line_range{0, TIMESTAMP_SIZE};
41✔
88
        sa.emplace_back(lr, L_TIMESTAMP.value());
41✔
89
        log_format::annotate(lf, line_number, sa, values, annotate_module);
41✔
90
    }
41✔
91

92
    void get_subline(const logline& ll,
317✔
93
                     shared_buffer_ref& sbr,
94
                     subline_options opts) override
95
    {
96
        this->plf_cached_line.resize(TIMESTAMP_SIZE);
317✔
97
        auto tlen = sql_strftime(this->plf_cached_line.data(),
317✔
98
                                 this->plf_cached_line.size(),
99
                                 ll.get_timeval(),
317✔
100
                                 'T');
101
        this->plf_cached_line.resize(tlen);
317✔
102
        {
103
            char zone_str[16];
104
            exttm tmptm;
317✔
105

106
            tmptm.et_flags |= ETF_ZONE_SET;
317✔
107
            tmptm.et_gmtoff
108
                = lnav::local_time_to_info(
634✔
109
                      date::local_seconds{ll.get_time<std::chrono::seconds>()})
317✔
110
                      .first.offset.count();
317✔
111
            off_t zone_len = 0;
317✔
112
            ftime_z(zone_str, zone_len, sizeof(zone_str), tmptm);
317✔
113
            for (off_t lpc = 0; lpc < zone_len; lpc++) {
1,902✔
114
                this->plf_cached_line.push_back(zone_str[lpc]);
1,585✔
115
            }
116
        }
117
        this->plf_cached_line.push_back(' ');
317✔
118
        const auto prefix_len = this->plf_cached_line.size();
317✔
119
        this->plf_cached_line.resize(this->plf_cached_line.size()
634✔
120
                                     + sbr.length());
317✔
121
        memcpy(
317✔
122
            &this->plf_cached_line[prefix_len], sbr.get_data(), sbr.length());
317✔
123

124
        sbr.share(this->plf_share_manager,
634✔
125
                  this->plf_cached_line.data(),
317✔
126
                  this->plf_cached_line.size());
127
    }
317✔
128

129
    std::shared_ptr<log_format> specialized(int fmt_lock) override
6✔
130
    {
131
        auto retval = std::make_shared<piper_log_format>(*this);
6✔
132

133
        retval->lf_specialized = true;
6✔
134
        retval->lf_timestamp_flags |= ETF_ZONE_SET | ETF_MICROS_SET;
6✔
135
        return retval;
12✔
136
    }
6✔
137

138
private:
139
    shared_buffer plf_share_manager;
140
    std::vector<char> plf_cached_line;
141
};
142

143
class generic_log_format : public log_format {
144
public:
145
    static const pcre_format* get_pcre_log_formats()
10,852✔
146
    {
147
        static const pcre_format log_fmt[] = {
148
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>@[0-9a-zA-Z]{16,24}))"),
149
            pcre_format(
150
                R"(^(?:\*\*\*\s+)?(?<timestamp>(?:\s|\d{4}[\-\/]\d{2}[\-\/]\d{2}|T|\d{1,2}:\d{2}(?::\d{2}(?:[\.,]\d{1,6})?)?|Z|[+\-]\d{2}:?\d{2}|(?!DBG|DEBUG|ERR|INFO|WARN|NONE)[A-Z]{3,4})+)[:|\s]?(trc|trace|dbg|debug|info|warn(?:ing)?|err(?:or)?)[:|\s]\s*)"),
151
            pcre_format(
152
                R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,+/-]+) \[(trace|debug|info|warn(?:ing)?|error|critical)\]\s+)"),
153
            pcre_format(
154
                R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,+/-]+) -- (trace|debug|info|warn(?:ing)?|error|critical) --\s+)"),
155

156
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+/\.-]+) \[\w\s+)"),
157
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+,/\.-]+)\s+)"),
158
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+,/\.-]+) -\s+)"),
159
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,/-]+) -\s+)"),
160
            pcre_format(
161
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\] \[(trace|debug|info|warn(?:ing)?|error|critical)\]\s+)"),
162
            pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w: "
163
                        "\\.,/-]+)\\[[^\\]]+\\]\\s+"),
164
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,/-]+)\s+)"),
165

166
            pcre_format(
167
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\]\s*(\w+):?\s+)"),
168
            pcre_format(
169
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\]\s+)"),
170
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: "
171
                        "\\.,+/-]+)\\] \\w+\\s+"),
172
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: ,+/-]+)\\] "
173
                        "\\(\\d+\\)\\s+"),
174

175
            pcre_format(),
176
        };
10,852✔
177

178
        return log_fmt;
10,852✔
179
    }
180

181
    std::string get_pattern_regex(uint64_t line_number) const override
×
182
    {
183
        int pat_index = this->pattern_index_for_line(line_number);
×
184
        return get_pcre_log_formats()[pat_index].name;
×
185
    }
186

187
    const intern_string_t get_name() const override
13,108✔
188
    {
189
        static const intern_string_t RETVAL
190
            = intern_string::lookup("generic_log");
14,550✔
191

192
        return RETVAL;
13,108✔
193
    }
194

195
    scan_result_t scan(logfile& lf,
10,766✔
196
                       std::vector<logline>& dst,
197
                       const line_info& li,
198
                       shared_buffer_ref& sbr,
199
                       scan_batch_context& sbc) override
200
    {
201
        exttm log_time;
10,766✔
202
        timeval log_tv;
203
        string_fragment ts;
10,766✔
204
        std::optional<string_fragment> level;
10,766✔
205
        const char* last_pos;
206

207
        if (dst.empty()) {
10,766✔
208
            auto file_options = lf.get_file_options();
192✔
209

210
            if (file_options) {
192✔
211
                this->lf_date_time.dts_default_zone
212
                    = file_options->second.fo_default_zone.pp_value;
2✔
213
            } else {
214
                this->lf_date_time.dts_default_zone = nullptr;
190✔
215
            }
216
        }
192✔
217

218
        if ((last_pos = this->log_scanf(dst.size(),
10,766✔
219
                                        sbr.to_string_fragment(),
220
                                        get_pcre_log_formats(),
221
                                        nullptr,
222
                                        &log_time,
223
                                        &log_tv,
224

225
                                        &ts,
226
                                        &level))
227
            != nullptr)
10,766✔
228
        {
229
            auto level_val = log_level_t::LEVEL_UNKNOWN;
1,525✔
230
            if (level) {
1,525✔
231
                level_val = string2level(level->data(), level->length());
1,525✔
232
            }
233

234
            if (!((log_time.et_flags & ETF_DAY_SET)
1,525✔
235
                  && (log_time.et_flags & ETF_MONTH_SET)
1,450✔
236
                  && (log_time.et_flags & ETF_YEAR_SET)))
1,450✔
237
            {
238
                this->check_for_new_year(dst, log_time, log_tv);
686✔
239
            }
240

241
            if (!(this->lf_timestamp_flags
3,050✔
242
                  & (ETF_MILLIS_SET | ETF_MICROS_SET | ETF_NANOS_SET))
1,525✔
243
                && !dst.empty()
1,200✔
244
                && dst.back().get_time<std::chrono::seconds>().count()
1,198✔
245
                    == log_tv.tv_sec
1,198✔
246
                && dst.back()
3,587✔
247
                        .get_subsecond_time<std::chrono::microseconds>()
2,387✔
248
                        .count()
862✔
249
                    != 0)
250
            {
251
                auto log_ms
252
                    = dst.back()
×
253
                          .get_subsecond_time<std::chrono::microseconds>();
×
254

255
                log_time.et_nsec
256
                    = std::chrono::duration_cast<std::chrono::nanoseconds>(
×
257
                          log_ms)
258
                          .count();
×
259
                log_tv.tv_usec
260
                    = std::chrono::duration_cast<std::chrono::microseconds>(
×
261
                          log_ms)
262
                          .count();
×
263
            }
264

265
            auto log_us = to_us(log_tv);
1,525✔
266
            auto tid_iter = sbc.sbc_tids.insert_tid(
1,525✔
NEW
267
                sbc.sbc_allocator, string_fragment{}, log_us);
×
268
            tid_iter->second.titr_level_stats.update_msg_count(level_val);
1,525✔
269
            dst.emplace_back(li.li_file_range.fr_offset, log_us, level_val);
1,525✔
270
            return scan_match{5};
1,525✔
271
        }
272

273
        return scan_no_match{"no patterns matched"};
9,241✔
274
    }
275

276
    void annotate(logfile* lf,
86✔
277
                  uint64_t line_number,
278
                  string_attrs_t& sa,
279
                  logline_value_vector& values,
280
                  bool annotate_module) const override
281
    {
282
        thread_local auto md = lnav::pcre2pp::match_data::unitialized();
86✔
283
        auto& line = values.lvv_sbr;
86✔
284
        int pat_index = this->pattern_index_for_line(line_number);
86✔
285
        const auto& fmt = get_pcre_log_formats()[pat_index];
86✔
286
        int prefix_len = 0;
86✔
287
        const auto line_sf = line.to_string_fragment();
86✔
288
        auto match_res = fmt.pcre->capture_from(line_sf)
86✔
289
                             .into(md)
86✔
290
                             .matches(PCRE2_NO_UTF_CHECK)
172✔
291
                             .ignore_error();
86✔
292
        if (!match_res) {
86✔
293
            return;
7✔
294
        }
295

296
        auto ts_cap = md[fmt.pf_timestamp_index].value();
79✔
297
        auto lr = to_line_range(ts_cap.trim());
79✔
298
        sa.emplace_back(lr, L_TIMESTAMP.value());
79✔
299

300
        values.lvv_values.emplace_back(TS_META, line, lr);
79✔
301
        values.lvv_values.back().lv_meta.lvm_format = (log_format*) this;
79✔
302

303
        prefix_len = md[0]->sf_end;
79✔
304
        auto level_cap = md[2];
79✔
305
        if (level_cap) {
79✔
306
            if (string2level(level_cap->data(), level_cap->length(), true)
73✔
307
                != LEVEL_UNKNOWN)
73✔
308
            {
309
                values.lvv_values.emplace_back(
73✔
310
                    LEVEL_META, line, to_line_range(level_cap->trim()));
73✔
311
                values.lvv_values.back().lv_meta.lvm_format
73✔
312
                    = (log_format*) this;
73✔
313

314
                lr = to_line_range(level_cap->trim());
73✔
315
                if (lr.lr_end != (ssize_t) line.length()) {
73✔
316
                    sa.emplace_back(lr, L_LEVEL.value());
73✔
317
                }
318
            }
319
        }
320

321
        lr.lr_start = 0;
79✔
322
        lr.lr_end = prefix_len;
79✔
323
        sa.emplace_back(lr, L_PREFIX.value());
79✔
324

325
        lr.lr_start = prefix_len;
79✔
326
        lr.lr_end = line.length();
79✔
327
        sa.emplace_back(lr, SA_BODY.value());
79✔
328

329
        log_format::annotate(lf, line_number, sa, values, annotate_module);
79✔
330
    }
331

332
    std::shared_ptr<log_format> specialized(int fmt_lock) override
50✔
333
    {
334
        auto retval = std::make_shared<generic_log_format>(*this);
50✔
335

336
        retval->lf_specialized = true;
50✔
337
        return retval;
100✔
338
    }
50✔
339

340
    bool hide_field(const intern_string_t field_name, bool val) override
2✔
341
    {
342
        if (field_name == TS_META.lvm_name) {
2✔
343
            TS_META.lvm_user_hidden = val;
1✔
344
            return true;
1✔
345
        }
346
        if (field_name == LEVEL_META.lvm_name) {
1✔
347
            LEVEL_META.lvm_user_hidden = val;
1✔
348
            return true;
1✔
349
        }
350
        if (field_name == OPID_META.lvm_name) {
×
351
            OPID_META.lvm_user_hidden = val;
×
352
            return true;
×
353
        }
354
        return false;
×
355
    }
356

357
    std::map<intern_string_t, logline_value_meta> get_field_states() override
125✔
358
    {
359
        return {
360
            {TS_META.lvm_name, TS_META},
361
            {LEVEL_META.lvm_name, LEVEL_META},
362
            {OPID_META.lvm_name, OPID_META},
363
        };
625✔
364
    }
125✔
365

366
private:
367
    static logline_value_meta TS_META;
368
    static logline_value_meta LEVEL_META;
369
    static logline_value_meta OPID_META;
370
};
371

372
logline_value_meta generic_log_format::TS_META{
373
    intern_string::lookup("log_time"),
374
    value_kind_t::VALUE_TEXT,
375
    logline_value_meta::table_column{2},
376
};
377

378
logline_value_meta generic_log_format::LEVEL_META{
379
    intern_string::lookup("log_level"),
380
    value_kind_t::VALUE_TEXT,
381
    logline_value_meta::table_column{3},
382
};
383

384
logline_value_meta generic_log_format::OPID_META{
385
    intern_string::lookup("log_opid"),
386
    value_kind_t::VALUE_TEXT,
387
    logline_value_meta::internal_column{},
388
};
389

390
std::string
391
from_escaped_string(const char* str, size_t len)
22✔
392
{
393
    std::string retval;
22✔
394

395
    for (size_t lpc = 0; lpc < len; lpc++) {
44✔
396
        switch (str[lpc]) {
22✔
397
            case '\\':
22✔
398
                if ((lpc + 3) < len && str[lpc + 1] == 'x') {
22✔
399
                    int ch;
400

401
                    if (sscanf(&str[lpc + 2], "%2x", &ch) == 1) {
22✔
402
                        retval.append(1, (char) ch & 0xff);
22✔
403
                        lpc += 3;
22✔
404
                    }
405
                }
406
                break;
22✔
407
            default:
×
408
                retval.append(1, str[lpc]);
×
409
                break;
×
410
        }
411
    }
412

413
    return retval;
22✔
414
}
×
415

416
std::optional<const char*>
417
lnav_strnstr(const char* s, const char* find, size_t slen)
1,577,974✔
418
{
419
    char c, sc;
420
    size_t len;
421

422
    if ((c = *find++) != '\0') {
1,577,974✔
423
        len = strlen(find);
1,577,974✔
424
        do {
425
            do {
426
                if (slen < 1 || (sc = *s) == '\0') {
6,762,903✔
427
                    return std::nullopt;
856,806✔
428
                }
429
                --slen;
5,906,097✔
430
                ++s;
5,906,097✔
431
            } while (sc != c);
5,906,097✔
432
            if (len > slen) {
721,168✔
433
                return std::nullopt;
×
434
            }
435
        } while (strncmp(s, find, len) != 0);
721,168✔
436
        s--;
721,168✔
437
    }
438
    return s;
721,168✔
439
}
440

441
struct separated_string {
442
    const char* ss_str;
443
    size_t ss_len;
444
    const char* ss_separator;
445
    size_t ss_separator_len;
446

447
    separated_string(const char* str, size_t len)
34,127✔
448
        : ss_str(str), ss_len(len), ss_separator(","),
34,127✔
449
          ss_separator_len(strlen(this->ss_separator))
34,127✔
450
    {
451
    }
34,127✔
452

453
    separated_string& with_separator(const char* sep)
34,127✔
454
    {
455
        this->ss_separator = sep;
34,127✔
456
        this->ss_separator_len = strlen(sep);
34,127✔
457
        return *this;
34,127✔
458
    }
459

460
    struct iterator {
461
        const separated_string& i_parent;
462
        const char* i_pos;
463
        const char* i_next_pos;
464
        size_t i_index;
465

466
        iterator(const separated_string& ss, const char* pos)
823,037✔
467
            : i_parent(ss), i_pos(pos), i_next_pos(pos), i_index(0)
823,037✔
468
        {
469
            this->update();
823,037✔
470
        }
823,037✔
471

472
        void update()
1,577,974✔
473
        {
474
            const separated_string& ss = this->i_parent;
1,577,974✔
475
            auto next_field
476
                = lnav_strnstr(this->i_pos,
1,577,974✔
477
                               ss.ss_separator,
1,577,974✔
478
                               ss.ss_len - (this->i_pos - ss.ss_str));
1,577,974✔
479
            if (next_field) {
1,577,974✔
480
                this->i_next_pos = next_field.value() + ss.ss_separator_len;
721,168✔
481
            } else {
482
                this->i_next_pos = ss.ss_str + ss.ss_len;
856,806✔
483
            }
484
        }
1,577,974✔
485

486
        iterator& operator++()
754,937✔
487
        {
488
            this->i_pos = this->i_next_pos;
754,937✔
489
            this->update();
754,937✔
490
            this->i_index += 1;
754,937✔
491

492
            return *this;
754,937✔
493
        }
494

495
        string_fragment operator*()
678,319✔
496
        {
497
            const auto& ss = this->i_parent;
678,319✔
498
            int end;
499

500
            if (this->i_next_pos < (ss.ss_str + ss.ss_len)) {
678,319✔
501
                end = this->i_next_pos - ss.ss_str - ss.ss_separator_len;
648,586✔
502
            } else {
503
                end = this->i_next_pos - ss.ss_str;
29,733✔
504
            }
505
            return string_fragment::from_byte_range(
678,319✔
506
                ss.ss_str, this->i_pos - ss.ss_str, end);
678,319✔
507
        }
508

509
        bool operator==(const iterator& other) const
788,910✔
510
        {
511
            return (&this->i_parent == &other.i_parent)
788,910✔
512
                && (this->i_pos == other.i_pos);
788,910✔
513
        }
514

515
        bool operator!=(const iterator& other) const
788,756✔
516
        {
517
            return !(*this == other);
788,756✔
518
        }
519

520
        size_t index() const { return this->i_index; }
1,626,672✔
521
    };
522

523
    iterator begin() { return {*this, this->ss_str}; }
34,127✔
524

525
    iterator end() { return {*this, this->ss_str + this->ss_len}; }
788,910✔
526
};
527

528
class bro_log_format : public log_format {
529
public:
530
    static const intern_string_t TS;
531
    static const intern_string_t DURATION;
532
    struct field_def {
533
        logline_value_meta fd_meta;
534
        logline_value_meta* fd_root_meta;
535
        std::string fd_collator;
536
        std::optional<size_t> fd_numeric_index;
537

538
        explicit field_def(const intern_string_t name,
622✔
539
                           size_t col,
540
                           log_format* format)
541
            : fd_meta(name,
1,244✔
542
                      value_kind_t::VALUE_TEXT,
543
                      logline_value_meta::table_column{col},
622✔
544
                      format),
545
              fd_root_meta(&FIELD_META.find(name)->second)
622✔
546
        {
547
        }
622✔
548

549
        field_def& with_kind(value_kind_t kind,
458✔
550
                             bool identifier = false,
551
                             bool foreign_key = false,
552
                             const std::string& collator = "")
553
        {
554
            this->fd_meta.lvm_kind = kind;
458✔
555
            this->fd_meta.lvm_identifier = identifier;
458✔
556
            this->fd_meta.lvm_foreign_key = foreign_key;
458✔
557
            this->fd_collator = collator;
458✔
558
            return *this;
458✔
559
        }
560

561
        field_def& with_numeric_index(size_t index)
116✔
562
        {
563
            this->fd_numeric_index = index;
116✔
564
            return *this;
116✔
565
        }
566
    };
567

568
    static std::unordered_map<const intern_string_t, logline_value_meta>
569
        FIELD_META;
570

571
    static const intern_string_t get_opid_desc()
2,593✔
572
    {
573
        static const intern_string_t RETVAL = intern_string::lookup("std");
4,047✔
574

575
        return RETVAL;
2,593✔
576
    }
577

578
    bro_log_format()
727✔
579
    {
727✔
580
        this->lf_structured = true;
727✔
581
        this->lf_is_self_describing = true;
727✔
582
        this->lf_time_ordered = false;
727✔
583
        this->lf_timestamp_point_of_reference
584
            = timestamp_point_of_reference_t::start;
727✔
585

586
        auto desc_v = std::make_shared<std::vector<opid_descriptor>>();
727✔
587
        desc_v->emplace({});
727✔
588
        this->lf_opid_description_def->emplace(get_opid_desc(),
1,454✔
589
                                               opid_descriptors{desc_v});
1,454✔
590
    }
727✔
591

592
    const intern_string_t get_name() const override
114,486✔
593
    {
594
        static const intern_string_t name(intern_string::lookup("bro"));
115,928✔
595

596
        return this->blf_format_name.empty() ? name : this->blf_format_name;
114,486✔
597
    }
598

599
    void clear() override
10,825✔
600
    {
601
        this->log_format::clear();
10,825✔
602
        this->blf_format_name.clear();
10,825✔
603
        this->blf_field_defs.clear();
10,825✔
604
    }
10,825✔
605

606
    scan_result_t scan_int(std::vector<logline>& dst,
4,168✔
607
                           const line_info& li,
608
                           shared_buffer_ref& sbr,
609
                           scan_batch_context& sbc)
610
    {
611
        static const intern_string_t STATUS_CODE
612
            = intern_string::lookup("bro_status_code");
4,210✔
613
        static const intern_string_t UID = intern_string::lookup("bro_uid");
4,210✔
614
        static const intern_string_t ID_ORIG_H
615
            = intern_string::lookup("bro_id_orig_h");
4,210✔
616

617
        separated_string ss(sbr.get_data(), sbr.length());
4,168✔
618
        timeval tv;
619
        exttm tm;
4,168✔
620
        auto found_ts = false;
4,168✔
621
        log_level_t level = LEVEL_INFO;
4,168✔
622
        uint16_t opid = 0;
4,168✔
623
        auto opid_cap = string_fragment::invalid();
4,168✔
624
        auto host_cap = string_fragment::invalid();
4,168✔
625
        auto duration = std::chrono::microseconds{0};
4,168✔
626

627
        ss.with_separator(this->blf_separator.get());
4,168✔
628

629
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
122,914✔
630
            if (iter.index() == 0 && *iter == "#close") {
118,768✔
631
                return scan_match{2000};
22✔
632
            }
633

634
            if (iter.index() >= this->blf_field_defs.size()) {
118,746✔
635
                break;
×
636
            }
637

638
            const auto& fd = this->blf_field_defs[iter.index()];
118,746✔
639

640
            if (TS == fd.fd_meta.lvm_name) {
118,746✔
641
                static const char* const TIME_FMT[] = {"%s.%f"};
642
                const auto sf = *iter;
4,146✔
643

644
                if (this->lf_date_time.scan(
4,146✔
645
                        sf.data(), sf.length(), TIME_FMT, &tm, tv))
4,146✔
646
                {
647
                    this->lf_timestamp_flags = tm.et_flags;
4,146✔
648
                    found_ts = true;
4,146✔
649
                }
650
            } else if (STATUS_CODE == fd.fd_meta.lvm_name) {
114,600✔
651
                const auto sf = *iter;
3,960✔
652

653
                if (!sf.empty() && sf[0] >= '4') {
3,960✔
654
                    level = LEVEL_ERROR;
20✔
655
                }
656
            } else if (UID == fd.fd_meta.lvm_name) {
110,640✔
657
                opid_cap = *iter;
4,146✔
658

659
                opid = opid_cap.hash();
4,146✔
660
            } else if (ID_ORIG_H == fd.fd_meta.lvm_name) {
106,494✔
661
                host_cap = *iter;
4,146✔
662
            } else if (DURATION == fd.fd_meta.lvm_name) {
102,348✔
663
                const auto sf = *iter;
186✔
664
                auto scan_res = scn::scan<double>("{}", sf.to_string_view());
186✔
665
                if (scan_res) {
186✔
666
                    duration = std::chrono::microseconds{
×
667
                        static_cast<long long>(scan_res->value() * 1000000)};
668
                }
669
            }
670

671
            if (fd.fd_numeric_index) {
118,746✔
672
                switch (fd.fd_meta.lvm_kind) {
21,288✔
673
                    case value_kind_t::VALUE_INTEGER:
21,288✔
674
                    case value_kind_t::VALUE_FLOAT: {
675
                        const auto sv = (*iter).to_string_view();
21,288✔
676
                        auto scan_float_res = scn::scan_value<double>(sv);
21,288✔
677
                        if (scan_float_res) {
21,288✔
678
                            this->lf_value_stats[fd.fd_numeric_index.value()]
17,328✔
679
                                .add_value(scan_float_res->value());
17,328✔
680
                        }
681
                        break;
21,288✔
682
                    }
683
                    default:
×
684
                        break;
×
685
                }
686
            }
687
        }
688

689
        if (found_ts) {
4,146✔
690
            if (!this->lf_specialized) {
4,146✔
691
                for (auto& ll : dst) {
198✔
692
                    ll.set_ignore(true);
176✔
693
                }
694
            }
695

696
            auto log_us = to_us(tv);
4,146✔
697
            if (opid_cap.is_valid()) {
4,146✔
698
                auto opid_iter = sbc.sbc_opids.insert_op(
4,146✔
699
                    sbc.sbc_allocator,
700
                    opid_cap,
701
                    log_us,
702
                    this->lf_timestamp_point_of_reference,
703
                    duration);
704
                opid_iter->second.otr_level_stats.update_msg_count(level);
4,146✔
705

706
                auto& otr = opid_iter->second;
4,146✔
707
                if (!otr.otr_description.lod_id && host_cap.is_valid()
6,012✔
708
                    && otr.otr_description.lod_elements.empty())
6,012✔
709
                {
710
                    otr.otr_description.lod_id = get_opid_desc();
1,866✔
711
                    otr.otr_description.lod_elements.emplace_back(
3,732✔
712
                        0, host_cap.to_string());
1,866✔
713
                }
714
            }
715
            dst.emplace_back(
4,146✔
716
                li.li_file_range.fr_offset, log_us, level, 0, opid);
4,146✔
717
            dst.back().set_opid(opid);
4,146✔
718
            return scan_match{2000};
4,146✔
719
        }
720
        return scan_no_match{"no header found"};
×
721
    }
722

723
    scan_result_t scan(logfile& lf,
10,803✔
724
                       std::vector<logline>& dst,
725
                       const line_info& li,
726
                       shared_buffer_ref& sbr,
727
                       scan_batch_context& sbc) override
728
    {
729
        static const auto SEP_RE
730
            = lnav::pcre2pp::code::from_const(R"(^#separator\s+(.+))");
10,803✔
731

732
        if (dst.empty()) {
10,803✔
733
            auto file_options = lf.get_file_options();
1,111✔
734

735
            if (file_options) {
1,111✔
736
                this->lf_date_time.dts_default_zone
737
                    = file_options->second.fo_default_zone.pp_value;
57✔
738
            } else {
739
                this->lf_date_time.dts_default_zone = nullptr;
1,054✔
740
            }
741
        }
1,111✔
742

743
        if (!this->blf_format_name.empty()) {
10,803✔
744
            return this->scan_int(dst, li, sbr, sbc);
4,146✔
745
        }
746

747
        if (dst.empty() || dst.size() > 20 || sbr.empty()
12,203✔
748
            || sbr.get_data()[0] == '#')
12,203✔
749
        {
750
            return scan_no_match{"no header found"};
4,185✔
751
        }
752

753
        auto line_iter = dst.begin();
2,472✔
754
        auto read_result = lf.read_line(line_iter);
2,472✔
755

756
        if (read_result.isErr()) {
2,472✔
757
            return scan_no_match{"unable to read first line"};
×
758
        }
759

760
        auto line = read_result.unwrap();
2,472✔
761
        auto md = SEP_RE.create_match_data();
2,472✔
762

763
        auto match_res = SEP_RE.capture_from(line.to_string_fragment())
2,472✔
764
                             .into(md)
2,472✔
765
                             .matches(PCRE2_NO_UTF_CHECK)
4,944✔
766
                             .ignore_error();
2,472✔
767
        if (!match_res) {
2,472✔
768
            return scan_no_match{"cannot read separator header"};
2,450✔
769
        }
770

771
        this->clear();
22✔
772

773
        auto sep = from_escaped_string(md[1]->data(), md[1]->length());
22✔
774
        this->blf_separator = intern_string::lookup(sep);
22✔
775

776
        for (++line_iter; line_iter != dst.end(); ++line_iter) {
176✔
777
            auto next_read_result = lf.read_line(line_iter);
154✔
778

779
            if (next_read_result.isErr()) {
154✔
780
                return scan_no_match{"unable to read header line"};
×
781
            }
782

783
            line = next_read_result.unwrap();
154✔
784
            separated_string ss(line.get_data(), line.length());
154✔
785

786
            ss.with_separator(this->blf_separator.get());
154✔
787
            auto iter = ss.begin();
154✔
788

789
            string_fragment directive = *iter;
154✔
790

791
            if (directive.empty() || directive[0] != '#') {
154✔
792
                continue;
×
793
            }
794

795
            ++iter;
154✔
796
            if (iter == ss.end()) {
154✔
797
                continue;
×
798
            }
799

800
            if (directive == "#set_separator") {
154✔
801
                this->blf_set_separator = intern_string::lookup(*iter);
22✔
802
            } else if (directive == "#empty_field") {
132✔
803
                this->blf_empty_field = intern_string::lookup(*iter);
22✔
804
            } else if (directive == "#unset_field") {
110✔
805
                this->blf_unset_field = intern_string::lookup(*iter);
22✔
806
            } else if (directive == "#path") {
88✔
807
                auto full_name = fmt::format(FMT_STRING("bro_{}_log"), *iter);
66✔
808
                this->blf_format_name = intern_string::lookup(full_name);
22✔
809
            } else if (directive == "#fields" && this->blf_field_defs.empty()) {
88✔
810
                do {
811
                    auto field_name
812
                        = intern_string::lookup("bro_" + sql_safe_ident(*iter));
622✔
813
                    auto common_iter = FIELD_META.find(field_name);
622✔
814
                    if (common_iter == FIELD_META.end()) {
622✔
815
                        FIELD_META.emplace(field_name,
616✔
816
                                           logline_value_meta{
1,232✔
817
                                               field_name,
818
                                               value_kind_t::VALUE_TEXT,
819
                                           });
820
                    }
821
                    this->blf_field_defs.emplace_back(
1,244✔
822
                        field_name, this->blf_field_defs.size(), this);
622✔
823
                    ++iter;
622✔
824
                } while (iter != ss.end());
622✔
825
            } else if (directive == "#types") {
44✔
826
                static const char* KNOWN_IDS[] = {
827
                    "bro_conn_uids",
828
                    "bro_fuid",
829
                    "bro_host",
830
                    "bro_info_code",
831
                    "bro_method",
832
                    "bro_mime_type",
833
                    "bro_orig_fuids",
834
                    "bro_parent_fuid",
835
                    "bro_proto",
836
                    "bro_referrer",
837
                    "bro_resp_fuids",
838
                    "bro_service",
839
                    "bro_uid",
840
                    "bro_uri",
841
                    "bro_user_agent",
842
                    "bro_username",
843
                };
844
                static const char* KNOWN_FOREIGN[] = {
845
                    "bro_status_code",
846
                };
847

848
                int numeric_count = 0;
22✔
849

850
                do {
851
                    string_fragment field_type = *iter;
622✔
852
                    auto& fd = this->blf_field_defs[iter.index() - 1];
622✔
853

854
                    if (field_type == "time") {
622✔
855
                        fd.with_kind(value_kind_t::VALUE_TIMESTAMP);
44✔
856
                    } else if (field_type == "string") {
600✔
857
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
456✔
858
                                                        std::end(KNOWN_IDS),
859
                                                        fd.fd_meta.lvm_name);
228✔
860
                        fd.with_kind(value_kind_t::VALUE_TEXT, ident);
456✔
861
                    } else if (field_type == "count") {
372✔
862
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
228✔
863
                                                        std::end(KNOWN_IDS),
864
                                                        fd.fd_meta.lvm_name);
114✔
865
                        bool foreign
866
                            = std::binary_search(std::begin(KNOWN_FOREIGN),
228✔
867
                                                 std::end(KNOWN_FOREIGN),
868
                                                 fd.fd_meta.lvm_name);
114✔
869
                        fd.with_kind(
228✔
870
                              value_kind_t::VALUE_INTEGER, ident, foreign)
871
                            .with_numeric_index(numeric_count);
114✔
872
                        numeric_count += 1;
114✔
873
                    } else if (field_type == "bool") {
258✔
874
                        fd.with_kind(value_kind_t::VALUE_BOOLEAN);
8✔
875
                    } else if (field_type == "addr") {
254✔
876
                        fd.with_kind(
88✔
877
                            value_kind_t::VALUE_TEXT, true, false, "ipaddress");
878
                    } else if (field_type == "port") {
210✔
879
                        fd.with_kind(value_kind_t::VALUE_INTEGER, true);
88✔
880
                    } else if (field_type == "interval") {
166✔
881
                        fd.with_kind(value_kind_t::VALUE_FLOAT)
4✔
882
                            .with_numeric_index(numeric_count);
2✔
883
                        numeric_count += 1;
2✔
884
                    }
885

886
                    ++iter;
622✔
887
                } while (iter != ss.end());
622✔
888

889
                this->lf_value_stats.resize(numeric_count);
22✔
890
            }
891
        }
154✔
892

893
        if (!this->blf_format_name.empty() && !this->blf_separator.empty()
44✔
894
            && !this->blf_field_defs.empty())
44✔
895
        {
896
            return this->scan_int(dst, li, sbr, sbc);
22✔
897
        }
898

899
        this->blf_format_name.clear();
×
900
        this->lf_value_stats.clear();
×
901

902
        return scan_no_match{"no header found"};
×
903
    }
2,472✔
904

905
    void annotate(logfile* lf,
29,805✔
906
                  uint64_t line_number,
907
                  string_attrs_t& sa,
908
                  logline_value_vector& values,
909
                  bool annotate_module) const override
910
    {
911
        static const intern_string_t UID = intern_string::lookup("bro_uid");
29,843✔
912

913
        auto& sbr = values.lvv_sbr;
29,805✔
914
        separated_string ss(sbr.get_data(), sbr.length());
29,805✔
915

916
        ss.with_separator(this->blf_separator.get());
29,805✔
917

918
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
664,598✔
919
            if (iter.index() >= this->blf_field_defs.size()) {
634,997✔
920
                return;
204✔
921
            }
922

923
            const field_def& fd = this->blf_field_defs[iter.index()];
634,793✔
924
            string_fragment sf = *iter;
634,793✔
925

926
            if (sf == this->blf_empty_field) {
634,793✔
927
                sf.clear();
29,608✔
928
            } else if (sf == this->blf_unset_field) {
605,185✔
929
                sf.invalidate();
69,122✔
930
            }
931

932
            auto lr = line_range(sf.sf_begin, sf.sf_end);
634,793✔
933

934
            if (fd.fd_meta.lvm_name == TS) {
634,793✔
935
                sa.emplace_back(lr, L_TIMESTAMP.value());
29,805✔
936
            } else if (fd.fd_meta.lvm_name == UID) {
604,988✔
937
                sa.emplace_back(lr, L_OPID.value());
29,805✔
938
                values.lvv_opid_value = sf.to_string();
29,805✔
939
                values.lvv_opid_provenance
940
                    = logline_value_vector::opid_provenance::file;
29,805✔
941
            }
942

943
            if (lr.is_valid()) {
634,793✔
944
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
565,671✔
945
            } else {
946
                values.lvv_values.emplace_back(fd.fd_meta);
69,122✔
947
            }
948
            values.lvv_values.back().lv_meta.lvm_user_hidden
634,793✔
949
                = fd.fd_root_meta->lvm_user_hidden;
634,793✔
950
        }
951

952
        log_format::annotate(lf, line_number, sa, values, annotate_module);
29,601✔
953
    }
954

955
    const logline_value_stats* stats_for_value(
35✔
956
        const intern_string_t& name) const override
957
    {
958
        const logline_value_stats* retval = nullptr;
35✔
959

960
        for (const auto& blf_field_def : this->blf_field_defs) {
525✔
961
            if (blf_field_def.fd_meta.lvm_name == name) {
525✔
962
                if (!blf_field_def.fd_numeric_index) {
35✔
963
                    break;
×
964
                }
965
                retval = &this->lf_value_stats[blf_field_def.fd_numeric_index
966
                                                   .value()];
35✔
967
                break;
35✔
968
            }
969
        }
970

971
        return retval;
35✔
972
    }
973

974
    bool hide_field(intern_string_t field_name, bool val) override
2✔
975
    {
976
        if (field_name == LOG_TIME_STR) {
2✔
977
            field_name = TS;
×
978
        }
979

980
        auto fd_iter = FIELD_META.find(field_name);
2✔
981
        if (fd_iter == FIELD_META.end()) {
2✔
982
            return false;
×
983
        }
984

985
        fd_iter->second.lvm_user_hidden = val;
2✔
986

987
        return true;
2✔
988
    }
989

990
    std::map<intern_string_t, logline_value_meta> get_field_states() override
125✔
991
    {
992
        std::map<intern_string_t, logline_value_meta> retval;
125✔
993

994
        for (const auto& fd : FIELD_META) {
473✔
995
            retval.emplace(fd.first, fd.second);
348✔
996
        }
997

998
        return retval;
125✔
999
    }
×
1000

1001
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
22✔
1002
    {
1003
        auto retval = std::make_shared<bro_log_format>(*this);
22✔
1004

1005
        retval->lf_specialized = true;
22✔
1006
        return retval;
44✔
1007
    }
22✔
1008

1009
    class bro_log_table : public log_format_vtab_impl {
1010
    public:
1011
        explicit bro_log_table(const bro_log_format& format)
20✔
1012
            : log_format_vtab_impl(format), blt_format(format)
20✔
1013
        {
1014
        }
20✔
1015

1016
        void get_columns(std::vector<vtab_column>& cols) const override
29✔
1017
        {
1018
            for (const auto& fd : this->blt_format.blf_field_defs) {
854✔
1019
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
825✔
1020
                    fd.fd_meta.lvm_kind);
825✔
1021

1022
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
825✔
1023
                                  type_pair.first,
1024
                                  fd.fd_collator,
825✔
1025
                                  false,
1,650✔
1026
                                  "",
1027
                                  type_pair.second);
1028
            }
1029
        }
29✔
1030

1031
        void get_foreign_keys(
10✔
1032
            std::unordered_set<std::string>& keys_inout) const override
1033
        {
1034
            this->log_vtab_impl::get_foreign_keys(keys_inout);
10✔
1035

1036
            for (const auto& fd : this->blt_format.blf_field_defs) {
292✔
1037
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
282✔
1038
                    keys_inout.emplace(fd.fd_meta.lvm_name.to_string());
123✔
1039
                }
1040
            }
1041
        }
10✔
1042

1043
        const bro_log_format& blt_format;
1044
    };
1045

1046
    static std::map<intern_string_t, std::shared_ptr<bro_log_table>>&
1047
    get_tables()
20✔
1048
    {
1049
        static std::map<intern_string_t, std::shared_ptr<bro_log_table>> retval;
20✔
1050

1051
        return retval;
20✔
1052
    }
1053

1054
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
627✔
1055
    {
1056
        if (this->blf_format_name.empty()) {
627✔
1057
            return nullptr;
607✔
1058
        }
1059

1060
        std::shared_ptr<bro_log_table> retval = nullptr;
20✔
1061

1062
        auto& tables = get_tables();
20✔
1063
        const auto iter = tables.find(this->blf_format_name);
20✔
1064
        if (iter == tables.end()) {
20✔
1065
            retval = std::make_shared<bro_log_table>(*this);
20✔
1066
            tables[this->blf_format_name] = retval;
20✔
1067
        }
1068

1069
        return retval;
20✔
1070
    }
20✔
1071

1072
    void get_subline(const logline& ll,
33,813✔
1073
                     shared_buffer_ref& sbr,
1074
                     subline_options opts) override
1075
    {
1076
    }
33,813✔
1077

1078
    intern_string_t blf_format_name;
1079
    intern_string_t blf_separator;
1080
    intern_string_t blf_set_separator;
1081
    intern_string_t blf_empty_field;
1082
    intern_string_t blf_unset_field;
1083
    std::vector<field_def> blf_field_defs;
1084
};
1085

1086
std::unordered_map<const intern_string_t, logline_value_meta>
1087
    bro_log_format::FIELD_META;
1088

1089
const intern_string_t bro_log_format::TS = intern_string::lookup("bro_ts");
1090
const intern_string_t bro_log_format::DURATION
1091
    = intern_string::lookup("bro_duration");
1092

1093
struct ws_separated_string {
1094
    const char* ss_str;
1095
    size_t ss_len;
1096

1097
    explicit ws_separated_string(const char* str = nullptr, size_t len = -1)
19,686✔
1098
        : ss_str(str), ss_len(len)
19,686✔
1099
    {
1100
    }
19,686✔
1101

1102
    struct iterator {
1103
        enum class state_t {
1104
            NORMAL,
1105
            QUOTED,
1106
        };
1107

1108
        const ws_separated_string& i_parent;
1109
        const char* i_pos;
1110
        const char* i_next_pos;
1111
        size_t i_index{0};
1112
        state_t i_state{state_t::NORMAL};
1113

1114
        iterator(const ws_separated_string& ss, const char* pos)
30,800✔
1115
            : i_parent(ss), i_pos(pos), i_next_pos(pos)
30,800✔
1116
        {
1117
            this->update();
30,800✔
1118
        }
30,800✔
1119

1120
        void update()
41,366✔
1121
        {
1122
            const auto& ss = this->i_parent;
41,366✔
1123
            bool done = false;
41,366✔
1124

1125
            while (!done && this->i_next_pos < (ss.ss_str + ss.ss_len)) {
336,693✔
1126
                switch (this->i_state) {
295,327✔
1127
                    case state_t::NORMAL:
288,569✔
1128
                        if (*this->i_next_pos == '"') {
288,569✔
1129
                            this->i_state = state_t::QUOTED;
255✔
1130
                        } else if (isspace(*this->i_next_pos)) {
288,314✔
1131
                            done = true;
25,956✔
1132
                        }
1133
                        break;
288,569✔
1134
                    case state_t::QUOTED:
6,758✔
1135
                        if (*this->i_next_pos == '"') {
6,758✔
1136
                            this->i_state = state_t::NORMAL;
255✔
1137
                        }
1138
                        break;
6,758✔
1139
                }
1140
                if (!done) {
295,327✔
1141
                    this->i_next_pos += 1;
269,371✔
1142
                }
1143
            }
1144
        }
41,366✔
1145

1146
        iterator& operator++()
10,566✔
1147
        {
1148
            const auto& ss = this->i_parent;
10,566✔
1149

1150
            this->i_pos = this->i_next_pos;
10,566✔
1151
            while (this->i_pos < (ss.ss_str + ss.ss_len)
10,566✔
1152
                   && isspace(*this->i_pos))
20,590✔
1153
            {
1154
                this->i_pos += 1;
10,024✔
1155
                this->i_next_pos += 1;
10,024✔
1156
            }
1157
            this->update();
10,566✔
1158
            this->i_index += 1;
10,566✔
1159

1160
            return *this;
10,566✔
1161
        }
1162

1163
        string_fragment operator*()
27,184✔
1164
        {
1165
            const auto& ss = this->i_parent;
27,184✔
1166
            int end = this->i_next_pos - ss.ss_str;
27,184✔
1167

1168
            return string_fragment(ss.ss_str, this->i_pos - ss.ss_str, end);
27,184✔
1169
        }
1170

1171
        bool operator==(const iterator& other) const
11,114✔
1172
        {
1173
            return (&this->i_parent == &other.i_parent)
11,114✔
1174
                && (this->i_pos == other.i_pos);
11,114✔
1175
        }
1176

1177
        bool operator!=(const iterator& other) const
8,545✔
1178
        {
1179
            return !(*this == other);
8,545✔
1180
        }
1181

1182
        size_t index() const { return this->i_index; }
15,809✔
1183
    };
1184

1185
    iterator begin() { return {*this, this->ss_str}; }
19,686✔
1186

1187
    iterator end() { return {*this, this->ss_str + this->ss_len}; }
11,114✔
1188
};
1189

1190
class w3c_log_format : public log_format {
1191
public:
1192
    static const intern_string_t F_DATE;
1193
    static const intern_string_t F_TIME;
1194

1195
    struct field_def {
1196
        const intern_string_t fd_name;
1197
        logline_value_meta fd_meta;
1198
        logline_value_meta* fd_root_meta{nullptr};
1199
        std::string fd_collator;
1200
        std::optional<size_t> fd_numeric_index;
1201

1202
        explicit field_def(const intern_string_t name)
14✔
1203
            : fd_name(name), fd_meta(intern_string::lookup(sql_safe_ident(
28✔
1204
                                         name.to_string_fragment())),
28✔
1205
                                     value_kind_t::VALUE_TEXT)
14✔
1206
        {
1207
        }
14✔
1208

1209
        field_def(const intern_string_t name, logline_value_meta meta)
59✔
1210
            : fd_name(name), fd_meta(meta)
59✔
1211
        {
1212
        }
59✔
1213

1214
        field_def(size_t col,
9,504✔
1215
                  const char* name,
1216
                  value_kind_t kind,
1217
                  bool ident = false,
1218
                  bool foreign_key = false,
1219
                  std::string coll = "")
1220
            : fd_name(intern_string::lookup(name)),
19,008✔
1221
              fd_meta(
19,008✔
1222
                  intern_string::lookup(sql_safe_ident(string_fragment(name))),
19,008✔
1223
                  kind,
1224
                  logline_value_meta::table_column{col}),
9,504✔
1225
              fd_collator(std::move(coll))
9,504✔
1226
        {
1227
            this->fd_meta.lvm_identifier = ident;
9,504✔
1228
            this->fd_meta.lvm_foreign_key = foreign_key;
9,504✔
1229
        }
9,504✔
1230

1231
        field_def& with_kind(value_kind_t kind,
1232
                             bool identifier = false,
1233
                             const std::string& collator = "")
1234
        {
1235
            this->fd_meta.lvm_kind = kind;
1236
            this->fd_meta.lvm_identifier = identifier;
1237
            this->fd_collator = collator;
1238
            return *this;
1239
        }
1240

1241
        field_def& with_numeric_index(int index)
27✔
1242
        {
1243
            this->fd_numeric_index = index;
27✔
1244
            return *this;
27✔
1245
        }
1246
    };
1247

1248
    static std::unordered_map<const intern_string_t, logline_value_meta>
1249
        FIELD_META;
1250

1251
    struct field_to_struct_t {
1252
        field_to_struct_t(const char* prefix, const char* struct_name)
2,376✔
1253
            : fs_prefix(prefix),
2,376✔
1254
              fs_struct_name(intern_string::lookup(struct_name))
4,752✔
1255
        {
1256
        }
2,376✔
1257

1258
        const char* fs_prefix;
1259
        intern_string_t fs_struct_name;
1260
    };
1261

1262
    static const std::array<field_def, 16>& get_known_fields()
607✔
1263
    {
1264
        static size_t KNOWN_FIELD_INDEX = 0;
1265
        static const std::array<field_def, 16> RETVAL = {
1266
            field_def{
1267
                KNOWN_FIELD_INDEX++,
1268
                "cs-method",
1269
                value_kind_t::VALUE_TEXT,
1270
                true,
1271
            },
1272
            {
1273
                KNOWN_FIELD_INDEX++,
1274
                "c-ip",
1275
                value_kind_t::VALUE_TEXT,
1276
                true,
1277
                false,
1278
                "ipaddress",
1279
            },
1280
            {
1281
                KNOWN_FIELD_INDEX++,
1282
                "cs-bytes",
1283
                value_kind_t::VALUE_INTEGER,
1284
                false,
1285
            },
1286
            {
1287
                KNOWN_FIELD_INDEX++,
1288
                "cs-host",
1289
                value_kind_t::VALUE_TEXT,
1290
                true,
1291
            },
1292
            {
1293
                KNOWN_FIELD_INDEX++,
1294
                "cs-uri-stem",
1295
                value_kind_t::VALUE_TEXT,
1296
                true,
1297
                false,
1298
                "naturalnocase",
1299
            },
1300
            {
1301
                KNOWN_FIELD_INDEX++,
1302
                "cs-uri-query",
1303
                value_kind_t::VALUE_TEXT,
1304
                false,
1305
            },
1306
            {
1307
                KNOWN_FIELD_INDEX++,
1308
                "cs-username",
1309
                value_kind_t::VALUE_TEXT,
1310
                false,
1311
            },
1312
            {
1313
                KNOWN_FIELD_INDEX++,
1314
                "cs-version",
1315
                value_kind_t::VALUE_TEXT,
1316
                true,
1317
            },
1318
            {
1319
                KNOWN_FIELD_INDEX++,
1320
                "s-ip",
1321
                value_kind_t::VALUE_TEXT,
1322
                true,
1323
                false,
1324
                "ipaddress",
1325
            },
1326
            {
1327
                KNOWN_FIELD_INDEX++,
1328
                "s-port",
1329
                value_kind_t::VALUE_INTEGER,
1330
                true,
1331
            },
1332
            {
1333
                KNOWN_FIELD_INDEX++,
1334
                "s-computername",
1335
                value_kind_t::VALUE_TEXT,
1336
                true,
1337
            },
1338
            {
1339
                KNOWN_FIELD_INDEX++,
1340
                "s-sitename",
1341
                value_kind_t::VALUE_TEXT,
1342
                true,
1343
            },
1344
            {
1345
                KNOWN_FIELD_INDEX++,
1346
                "sc-bytes",
1347
                value_kind_t::VALUE_INTEGER,
1348
                false,
1349
            },
1350
            {
1351
                KNOWN_FIELD_INDEX++,
1352
                "sc-status",
1353
                value_kind_t::VALUE_INTEGER,
1354
                false,
1355
                true,
1356
            },
1357
            {
1358
                KNOWN_FIELD_INDEX++,
1359
                "sc-substatus",
1360
                value_kind_t::VALUE_INTEGER,
1361
                false,
1362
            },
1363
            {
1364
                KNOWN_FIELD_INDEX++,
1365
                "time-taken",
1366
                value_kind_t::VALUE_FLOAT,
1367
                false,
1368
            },
1369
        };
1,795✔
1370

1371
        return RETVAL;
607✔
1372
    }
1373

1374
    static const std::array<field_to_struct_t, 4>& get_known_struct_fields()
604✔
1375
    {
1376
        static const std::array<field_to_struct_t, 4> RETVAL = {
1377
            field_to_struct_t{"cs(", "cs_headers"},
1378
            {"sc(", "sc_headers"},
1379
            {"rs(", "rs_headers"},
1380
            {"sr(", "sr_headers"},
1381
        };
604✔
1382

1383
        return RETVAL;
604✔
1384
    }
1385

1386
    w3c_log_format()
727✔
1387
    {
727✔
1388
        this->lf_is_self_describing = true;
727✔
1389
        this->lf_time_ordered = false;
727✔
1390
        this->lf_structured = true;
727✔
1391
    }
727✔
1392

1393
    const intern_string_t get_name() const override
13,060✔
1394
    {
1395
        static const intern_string_t name(intern_string::lookup("w3c_log"));
14,502✔
1396

1397
        return this->wlf_format_name.empty() ? name : this->wlf_format_name;
13,060✔
1398
    }
1399

1400
    void clear() override
13,482✔
1401
    {
1402
        this->log_format::clear();
13,482✔
1403
        this->wlf_time_scanner.clear();
13,482✔
1404
        this->wlf_format_name.clear();
13,482✔
1405
        this->wlf_field_defs.clear();
13,482✔
1406
    }
13,482✔
1407

1408
    scan_result_t scan_int(std::vector<logline>& dst,
311✔
1409
                           const line_info& li,
1410
                           shared_buffer_ref& sbr)
1411
    {
1412
        static const intern_string_t F_DATE_LOCAL
1413
            = intern_string::lookup("date-local");
337✔
1414
        static const intern_string_t F_DATE_UTC
1415
            = intern_string::lookup("date-UTC");
337✔
1416
        static const intern_string_t F_TIME_LOCAL
1417
            = intern_string::lookup("time-local");
337✔
1418
        static const intern_string_t F_TIME_UTC
1419
            = intern_string::lookup("time-UTC");
337✔
1420
        static const intern_string_t F_STATUS_CODE
1421
            = intern_string::lookup("sc-status");
337✔
1422

1423
        ws_separated_string ss(sbr.get_data(), sbr.length());
311✔
1424
        timeval date_tv{0, 0}, time_tv{0, 0};
311✔
1425
        exttm date_tm, time_tm;
311✔
1426
        bool found_date = false, found_time = false;
311✔
1427
        log_level_t level = LEVEL_INFO;
311✔
1428

1429
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
4,341✔
1430
            if (iter.index() >= this->wlf_field_defs.size()) {
4,080✔
1431
                level = LEVEL_INVALID;
1✔
1432
                break;
1✔
1433
            }
1434

1435
            const auto& fd = this->wlf_field_defs[iter.index()];
4,079✔
1436
            string_fragment sf = *iter;
4,079✔
1437

1438
            if (sf.startswith("#")) {
4,079✔
1439
                if (sf == "#Date:") {
49✔
1440
                    auto sbr_sf_opt
1441
                        = sbr.to_string_fragment().consume_n(sf.length());
13✔
1442

1443
                    if (sbr_sf_opt) {
13✔
1444
                        auto sbr_sf = sbr_sf_opt.value().trim();
13✔
1445
                        date_time_scanner dts;
13✔
1446
                        exttm tm;
13✔
1447
                        timeval tv;
1448

1449
                        if (dts.scan(sbr_sf.data(),
13✔
1450
                                     sbr_sf.length(),
13✔
1451
                                     nullptr,
1452
                                     &tm,
1453
                                     tv))
1454
                        {
1455
                            this->lf_date_time.set_base_time(tv.tv_sec,
12✔
1456
                                                             tm.et_tm);
1457
                            this->wlf_time_scanner.set_base_time(tv.tv_sec,
12✔
1458
                                                                 tm.et_tm);
1459
                        }
1460
                    }
1461
                }
1462
                dst.emplace_back(li.li_file_range.fr_offset,
49✔
1463
                                 std::chrono::microseconds{0},
×
1464
                                 LEVEL_IGNORE,
×
1465
                                 0);
49✔
1466
                return scan_match{2000};
49✔
1467
            }
1468

1469
            sf = sf.trim("\" \t");
4,030✔
1470
            if (F_DATE == fd.fd_name || F_DATE_LOCAL == fd.fd_name
7,842✔
1471
                || F_DATE_UTC == fd.fd_name)
7,842✔
1472
            {
1473
                if (this->lf_date_time.scan(
226✔
1474
                        sf.data(), sf.length(), nullptr, &date_tm, date_tv))
226✔
1475
                {
1476
                    this->lf_timestamp_flags |= date_tm.et_flags;
225✔
1477
                    found_date = true;
225✔
1478
                }
1479
            } else if (F_TIME == fd.fd_name || F_TIME_LOCAL == fd.fd_name
7,359✔
1480
                       || F_TIME_UTC == fd.fd_name)
7,359✔
1481
            {
1482
                if (this->wlf_time_scanner.scan(
257✔
1483
                        sf.data(), sf.length(), nullptr, &time_tm, time_tv))
257✔
1484
                {
1485
                    this->lf_timestamp_flags |= time_tm.et_flags;
257✔
1486
                    found_time = true;
257✔
1487
                }
1488
            } else if (F_STATUS_CODE == fd.fd_name) {
3,547✔
1489
                if (!sf.empty() && sf[0] >= '4') {
254✔
1490
                    level = LEVEL_ERROR;
206✔
1491
                }
1492
            }
1493

1494
            if (fd.fd_numeric_index) {
4,030✔
1495
                switch (fd.fd_meta.lvm_kind) {
1,338✔
1496
                    case value_kind_t::VALUE_INTEGER:
1,338✔
1497
                    case value_kind_t::VALUE_FLOAT: {
1498
                        auto scan_float_res
1499
                            = scn::scan_value<double>(sf.to_string_view());
1,338✔
1500

1501
                        if (scan_float_res) {
1,338✔
1502
                            this->lf_value_stats[fd.fd_numeric_index.value()]
1,334✔
1503
                                .add_value(scan_float_res->value());
1,334✔
1504
                        }
1505
                        break;
1,338✔
1506
                    }
1507
                    default:
×
1508
                        break;
×
1509
                }
1510
            }
1511
        }
1512

1513
        if (found_time) {
262✔
1514
            auto tm = time_tm;
257✔
1515

1516
            if (found_date) {
257✔
1517
                tm.et_tm.tm_year = date_tm.et_tm.tm_year;
225✔
1518
                tm.et_tm.tm_mday = date_tm.et_tm.tm_mday;
225✔
1519
                tm.et_tm.tm_mon = date_tm.et_tm.tm_mon;
225✔
1520
                tm.et_tm.tm_wday = date_tm.et_tm.tm_wday;
225✔
1521
                tm.et_tm.tm_yday = date_tm.et_tm.tm_yday;
225✔
1522
            }
1523

1524
            auto tv = tm.to_timeval();
257✔
1525
            if (!this->lf_specialized) {
257✔
1526
                for (auto& ll : dst) {
50✔
1527
                    ll.set_ignore(true);
40✔
1528
                }
1529
            }
1530
            dst.emplace_back(li.li_file_range.fr_offset, tv, level, 0);
257✔
1531
            return scan_match{2000};
257✔
1532
        }
1533

1534
        return scan_no_match{"no header found"};
5✔
1535
    }
1536

1537
    scan_result_t scan(logfile& lf,
10,807✔
1538
                       std::vector<logline>& dst,
1539
                       const line_info& li,
1540
                       shared_buffer_ref& sbr,
1541
                       scan_batch_context& sbc) override
1542
    {
1543
        static const auto* W3C_LOG_NAME = intern_string::lookup("w3c_log");
11,995✔
1544
        static const auto* X_FIELDS_NAME = intern_string::lookup("x_fields");
11,995✔
1545
        static const auto& KNOWN_FIELDS = get_known_fields();
10,807✔
1546
        static const auto& KNOWN_STRUCT_FIELDS = get_known_struct_fields();
10,807✔
1547
        static auto X_FIELDS_IDX = 0;
1548

1549
        if (li.li_partial) {
10,807✔
1550
            return scan_incomplete{};
18✔
1551
        }
1552

1553
        if (dst.empty()) {
10,789✔
1554
            auto file_options = lf.get_file_options();
1,109✔
1555

1556
            if (file_options) {
1,109✔
1557
                this->lf_date_time.dts_default_zone
1558
                    = file_options->second.fo_default_zone.pp_value;
57✔
1559
            } else {
1560
                this->lf_date_time.dts_default_zone = nullptr;
1,052✔
1561
            }
1562
        }
1,109✔
1563

1564
        if (!this->wlf_format_name.empty()) {
10,789✔
1565
            return this->scan_int(dst, li, sbr);
296✔
1566
        }
1567

1568
        if (dst.empty() || dst.size() > 20 || sbr.empty()
19,877✔
1569
            || sbr.get_data()[0] == '#')
19,877✔
1570
        {
1571
            return scan_no_match{"no header found"};
7,814✔
1572
        }
1573

1574
        this->clear();
2,679✔
1575

1576
        for (auto line_iter = dst.begin(); line_iter != dst.end(); ++line_iter)
21,817✔
1577
        {
1578
            auto next_read_result = lf.read_line(line_iter);
19,138✔
1579

1580
            if (next_read_result.isErr()) {
19,138✔
1581
                return scan_no_match{"unable to read first line"};
×
1582
            }
1583

1584
            auto line = next_read_result.unwrap();
19,138✔
1585
            ws_separated_string ss(line.get_data(), line.length());
19,138✔
1586
            auto iter = ss.begin();
19,138✔
1587
            const auto directive = *iter;
19,138✔
1588

1589
            if (directive.empty() || directive[0] != '#') {
19,138✔
1590
                continue;
16,569✔
1591
            }
1592

1593
            ++iter;
2,569✔
1594
            if (iter == ss.end()) {
2,569✔
1595
                continue;
41✔
1596
            }
1597

1598
            if (directive == "#Date:") {
2,528✔
1599
                date_time_scanner dts;
8✔
1600
                struct exttm tm;
8✔
1601
                struct timeval tv;
1602

1603
                if (dts.scan(line.get_data_at(directive.length() + 1),
8✔
1604
                             line.length() - directive.length() - 1,
8✔
1605
                             nullptr,
1606
                             &tm,
1607
                             tv))
1608
                {
1609
                    this->lf_date_time.set_base_time(tv.tv_sec, tm.et_tm);
7✔
1610
                    this->wlf_time_scanner.set_base_time(tv.tv_sec, tm.et_tm);
7✔
1611
                }
1612
            } else if (directive == "#Fields:" && this->wlf_field_defs.empty())
2,520✔
1613
            {
1614
                int numeric_count = 0;
15✔
1615

1616
                do {
1617
                    auto sf = (*iter).trim(")");
142✔
1618

1619
                    auto field_iter = std::find_if(
426✔
1620
                        begin(KNOWN_FIELDS),
1621
                        end(KNOWN_FIELDS),
1622
                        [&sf](auto elem) { return sf == elem.fd_name; });
1,676✔
1623
                    if (field_iter != end(KNOWN_FIELDS)) {
284✔
1624
                        this->wlf_field_defs.emplace_back(*field_iter);
69✔
1625
                        auto& fd = this->wlf_field_defs.back();
69✔
1626
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
69✔
1627
                        if (common_iter == FIELD_META.end()) {
69✔
1628
                            auto emp_res = FIELD_META.emplace(
68✔
1629
                                fd.fd_meta.lvm_name, fd.fd_meta);
68✔
1630
                            common_iter = emp_res.first;
68✔
1631
                        }
1632
                        fd.fd_root_meta = &common_iter->second;
69✔
1633
                    } else if (sf.is_one_of("date", "time")) {
73✔
1634
                        this->wlf_field_defs.emplace_back(
28✔
1635
                            intern_string::lookup(sf));
14✔
1636
                        auto& fd = this->wlf_field_defs.back();
14✔
1637
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
14✔
1638
                        if (common_iter == FIELD_META.end()) {
14✔
1639
                            auto emp_res = FIELD_META.emplace(
13✔
1640
                                fd.fd_meta.lvm_name, fd.fd_meta);
13✔
1641
                            common_iter = emp_res.first;
13✔
1642
                        }
1643
                        fd.fd_root_meta = &common_iter->second;
14✔
1644
                    } else {
1645
                        const auto fs_iter = std::find_if(
177✔
1646
                            begin(KNOWN_STRUCT_FIELDS),
1647
                            end(KNOWN_STRUCT_FIELDS),
1648
                            [&sf](auto elem) {
197✔
1649
                                return sf.startswith(elem.fs_prefix);
197✔
1650
                            });
1651
                        if (fs_iter != end(KNOWN_STRUCT_FIELDS)) {
118✔
1652
                            const intern_string_t field_name
1653
                                = intern_string::lookup(sf.substr(3));
13✔
1654
                            this->wlf_field_defs.emplace_back(
13✔
1655
                                field_name,
1656
                                logline_value_meta(
26✔
1657
                                    field_name,
1658
                                    value_kind_t::VALUE_TEXT,
1659
                                    logline_value_meta::table_column{
×
1660
                                        KNOWN_FIELDS.size() + 1
13✔
1661
                                        + std::distance(
39✔
1662
                                            begin(KNOWN_STRUCT_FIELDS),
1663
                                            fs_iter)},
1664
                                    this)
26✔
1665
                                    .with_struct_name(fs_iter->fs_struct_name));
1666
                        } else {
1667
                            const intern_string_t field_name
1668
                                = intern_string::lookup(sf);
46✔
1669
                            this->wlf_field_defs.emplace_back(
46✔
1670
                                field_name,
1671
                                logline_value_meta(
92✔
1672
                                    field_name,
1673
                                    value_kind_t::VALUE_TEXT,
1674
                                    logline_value_meta::table_column{
×
1675
                                        KNOWN_FIELDS.size() + X_FIELDS_IDX},
92✔
1676
                                    this)
92✔
1677
                                    .with_struct_name(X_FIELDS_NAME));
1678
                        }
1679
                    }
1680
                    auto& fd = this->wlf_field_defs.back();
142✔
1681
                    fd.fd_meta.lvm_format = std::make_optional(this);
142✔
1682
                    switch (fd.fd_meta.lvm_kind) {
142✔
1683
                        case value_kind_t::VALUE_FLOAT:
27✔
1684
                        case value_kind_t::VALUE_INTEGER:
1685
                            fd.with_numeric_index(numeric_count);
27✔
1686
                            numeric_count += 1;
27✔
1687
                            break;
27✔
1688
                        default:
115✔
1689
                            break;
115✔
1690
                    }
1691

1692
                    ++iter;
142✔
1693
                } while (iter != ss.end());
142✔
1694

1695
                this->wlf_format_name = W3C_LOG_NAME;
15✔
1696
                this->lf_value_stats.resize(numeric_count);
15✔
1697
            }
1698
        }
35,748✔
1699

1700
        if (!this->wlf_format_name.empty() && !this->wlf_field_defs.empty()) {
2,679✔
1701
            return this->scan_int(dst, li, sbr);
15✔
1702
        }
1703

1704
        this->wlf_format_name.clear();
2,664✔
1705
        this->lf_value_stats.clear();
2,664✔
1706

1707
        return scan_no_match{"no header found"};
2,664✔
1708
    }
1709

1710
    void annotate(logfile* lf,
237✔
1711
                  uint64_t line_number,
1712
                  string_attrs_t& sa,
1713
                  logline_value_vector& values,
1714
                  bool annotate_module) const override
1715
    {
1716
        auto& sbr = values.lvv_sbr;
237✔
1717
        ws_separated_string ss(sbr.get_data(), sbr.length());
237✔
1718
        std::optional<line_range> date_lr;
237✔
1719
        std::optional<line_range> time_lr;
237✔
1720

1721
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
4,062✔
1722
            auto sf = *iter;
3,825✔
1723

1724
            if (iter.index() >= this->wlf_field_defs.size()) {
3,825✔
1725
                sa.emplace_back(line_range{sf.sf_begin, -1},
×
1726
                                SA_INVALID.value("extra fields detected"));
×
1727
                return;
×
1728
            }
1729

1730
            const auto& fd = this->wlf_field_defs[iter.index()];
3,825✔
1731

1732
            if (sf == "-") {
3,825✔
1733
                sf.invalidate();
659✔
1734
            }
1735

1736
            auto lr = line_range(sf.sf_begin, sf.sf_end);
3,825✔
1737

1738
            if (lr.is_valid()) {
3,825✔
1739
                if (fd.fd_meta.lvm_name == F_DATE) {
3,166✔
1740
                    date_lr = lr;
215✔
1741
                } else if (fd.fd_meta.lvm_name == F_TIME) {
2,951✔
1742
                    time_lr = lr;
229✔
1743
                }
1744
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
3,166✔
1745
                if (sf.startswith("\"")) {
3,166✔
1746
                    auto& meta = values.lvv_values.back().lv_meta;
28✔
1747

1748
                    if (meta.lvm_kind == value_kind_t::VALUE_TEXT) {
28✔
1749
                        meta.lvm_kind = value_kind_t::VALUE_W3C_QUOTED;
26✔
1750
                    } else {
1751
                        meta.lvm_kind = value_kind_t::VALUE_NULL;
2✔
1752
                    }
1753
                }
1754
            } else {
1755
                values.lvv_values.emplace_back(fd.fd_meta);
659✔
1756
            }
1757
            if (fd.fd_root_meta != nullptr) {
3,825✔
1758
                values.lvv_values.back().lv_meta.lvm_user_hidden
3,128✔
1759
                    = fd.fd_root_meta->lvm_user_hidden;
3,128✔
1760
            }
1761
        }
1762
        if (time_lr) {
237✔
1763
            auto ts_lr = time_lr.value();
229✔
1764
            if (date_lr) {
229✔
1765
                if (date_lr->lr_end + 1 == time_lr->lr_start) {
214✔
1766
                    ts_lr.lr_start = date_lr->lr_start;
213✔
1767
                    ts_lr.lr_end = time_lr->lr_end;
213✔
1768
                }
1769
            }
1770

1771
            sa.emplace_back(ts_lr, L_TIMESTAMP.value());
229✔
1772
        }
1773
        log_format::annotate(lf, line_number, sa, values, annotate_module);
237✔
1774
    }
1775

1776
    const logline_value_stats* stats_for_value(
×
1777
        const intern_string_t& name) const override
1778
    {
1779
        const logline_value_stats* retval = nullptr;
×
1780

1781
        for (const auto& wlf_field_def : this->wlf_field_defs) {
×
1782
            if (wlf_field_def.fd_meta.lvm_name == name) {
×
1783
                if (!wlf_field_def.fd_numeric_index) {
×
1784
                    break;
×
1785
                }
1786
                retval = &this->lf_value_stats[wlf_field_def.fd_numeric_index
1787
                                                   .value()];
×
1788
                break;
×
1789
            }
1790
        }
1791

1792
        return retval;
×
1793
    }
1794

1795
    bool hide_field(const intern_string_t field_name, bool val) override
×
1796
    {
1797
        if (field_name == LOG_TIME_STR) {
×
1798
            auto date_iter = FIELD_META.find(F_DATE);
×
1799
            auto time_iter = FIELD_META.find(F_TIME);
×
1800
            if (date_iter == FIELD_META.end() || time_iter == FIELD_META.end())
×
1801
            {
1802
                return false;
×
1803
            }
1804
            date_iter->second.lvm_user_hidden = val;
×
1805
            time_iter->second.lvm_user_hidden = val;
×
1806
            return true;
×
1807
        }
1808

1809
        auto fd_iter = FIELD_META.find(field_name);
×
1810
        if (fd_iter == FIELD_META.end()) {
×
1811
            return false;
×
1812
        }
1813

1814
        fd_iter->second.lvm_user_hidden = val;
×
1815

1816
        return true;
×
1817
    }
1818

1819
    std::map<intern_string_t, logline_value_meta> get_field_states() override
125✔
1820
    {
1821
        std::map<intern_string_t, logline_value_meta> retval;
125✔
1822

1823
        for (const auto& fd : FIELD_META) {
125✔
1824
            retval.emplace(fd.first, fd.second);
×
1825
        }
1826

1827
        return retval;
125✔
1828
    }
×
1829

1830
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
10✔
1831
    {
1832
        auto retval = std::make_shared<w3c_log_format>(*this);
10✔
1833

1834
        retval->lf_specialized = true;
10✔
1835
        return retval;
20✔
1836
    }
10✔
1837

1838
    class w3c_log_table : public log_format_vtab_impl {
1839
    public:
1840
        explicit w3c_log_table(const w3c_log_format& format)
7✔
1841
            : log_format_vtab_impl(format), wlt_format(format)
7✔
1842
        {
1843
        }
7✔
1844

1845
        void get_columns(std::vector<vtab_column>& cols) const override
10✔
1846
        {
1847
            for (const auto& fd : get_known_fields()) {
170✔
1848
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
160✔
1849
                    fd.fd_meta.lvm_kind);
160✔
1850

1851
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
160✔
1852
                                  type_pair.first,
1853
                                  fd.fd_collator,
160✔
1854
                                  false,
320✔
1855
                                  "",
1856
                                  type_pair.second);
1857
            }
1858
            cols.emplace_back("x_fields");
10✔
1859
            cols.back().with_comment(
20✔
1860
                "A JSON-object that contains fields that are not first-class "
1861
                "columns");
1862
            for (const auto& fs : get_known_struct_fields()) {
50✔
1863
                cols.emplace_back(fs.fs_struct_name.to_string());
40✔
1864
            }
1865
        };
10✔
1866

1867
        void get_foreign_keys(
3✔
1868
            std::unordered_set<std::string>& keys_inout) const override
1869
        {
1870
            this->log_vtab_impl::get_foreign_keys(keys_inout);
3✔
1871

1872
            for (const auto& fd : get_known_fields()) {
51✔
1873
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
48✔
1874
                    keys_inout.emplace(fd.fd_meta.lvm_name.to_string());
30✔
1875
                }
1876
            }
1877
        }
3✔
1878

1879
        const w3c_log_format& wlt_format;
1880
    };
1881

1882
    static std::map<intern_string_t, std::shared_ptr<w3c_log_table>>&
1883
    get_tables()
7✔
1884
    {
1885
        static std::map<intern_string_t, std::shared_ptr<w3c_log_table>> retval;
7✔
1886

1887
        return retval;
7✔
1888
    }
1889

1890
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
614✔
1891
    {
1892
        if (this->wlf_format_name.empty()) {
614✔
1893
            return nullptr;
607✔
1894
        }
1895

1896
        std::shared_ptr<w3c_log_table> retval = nullptr;
7✔
1897

1898
        auto& tables = get_tables();
7✔
1899
        const auto iter = tables.find(this->wlf_format_name);
7✔
1900
        if (iter == tables.end()) {
7✔
1901
            retval = std::make_shared<w3c_log_table>(*this);
7✔
1902
            tables[this->wlf_format_name] = retval;
7✔
1903
        }
1904

1905
        return retval;
7✔
1906
    }
7✔
1907

1908
    void get_subline(const logline& ll,
344✔
1909
                     shared_buffer_ref& sbr,
1910
                     subline_options opts) override
1911
    {
1912
    }
344✔
1913

1914
    date_time_scanner wlf_time_scanner;
1915
    intern_string_t wlf_format_name;
1916
    std::vector<field_def> wlf_field_defs;
1917
};
1918

1919
std::unordered_map<const intern_string_t, logline_value_meta>
1920
    w3c_log_format::FIELD_META;
1921

1922
const intern_string_t w3c_log_format::F_DATE = intern_string::lookup("date");
1923
const intern_string_t w3c_log_format::F_TIME = intern_string::lookup("time");
1924

1925
struct logfmt_pair_handler {
1926
    explicit logfmt_pair_handler(date_time_scanner& dts) : lph_dt_scanner(dts)
10,803✔
1927
    {
1928
    }
10,803✔
1929

1930
    log_format::scan_result_t process_value(const string_fragment& value_frag)
4,016✔
1931
    {
1932
        if (this->lph_key_frag.is_one_of(
4,016✔
1933
                "timestamp"_frag, "time"_frag, "ts"_frag, "t"_frag))
1934
        {
1935
            if (!this->lph_dt_scanner.scan(value_frag.data(),
31✔
1936
                                           value_frag.length(),
31✔
1937
                                           nullptr,
1938
                                           &this->lph_time_tm,
1939
                                           this->lph_tv))
31✔
1940
            {
1941
                return log_format::scan_no_match{
×
1942
                    "timestamp value did not parse correctly"};
×
1943
            }
1944
            char buf[1024];
1945
            this->lph_dt_scanner.ftime(
31✔
1946
                buf, sizeof(buf), nullptr, this->lph_time_tm);
31✔
1947
            this->lph_found_time = true;
31✔
1948
        } else if (this->lph_key_frag.is_one_of("level"_frag, "lvl"_frag)) {
3,985✔
1949
            this->lph_level
1950
                = string2level(value_frag.data(), value_frag.length());
40✔
1951
        }
1952
        return log_format::scan_match{};
4,016✔
1953
    }
1954

1955
    date_time_scanner& lph_dt_scanner;
1956
    bool lph_found_time{false};
1957
    exttm lph_time_tm;
1958
    timeval lph_tv{0, 0};
1959
    log_level_t lph_level{log_level_t::LEVEL_INFO};
1960
    string_fragment lph_key_frag{""};
1961
};
1962

1963
class logfmt_format : public log_format {
1964
public:
1965
    const intern_string_t get_name() const override
13,409✔
1966
    {
1967
        const static intern_string_t NAME = intern_string::lookup("logfmt_log");
14,851✔
1968

1969
        return NAME;
13,409✔
1970
    }
1971

1972
    class logfmt_log_table : public log_format_vtab_impl {
1973
    public:
1974
        logfmt_log_table(const log_format& format)
607✔
1975
            : log_format_vtab_impl(format)
607✔
1976
        {
1977
        }
607✔
1978

1979
        void get_columns(std::vector<vtab_column>& cols) const override
608✔
1980
        {
1981
            static const auto FIELDS = std::string("fields");
1,822✔
1982

1983
            cols.emplace_back(FIELDS);
608✔
1984
        }
608✔
1985
    };
1986

1987
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
607✔
1988
    {
1989
        static auto retval = std::make_shared<logfmt_log_table>(*this);
607✔
1990

1991
        return retval;
607✔
1992
    }
1993

1994
    scan_result_t scan(logfile& lf,
10,803✔
1995
                       std::vector<logline>& dst,
1996
                       const line_info& li,
1997
                       shared_buffer_ref& sbr,
1998
                       scan_batch_context& sbc) override
1999
    {
2000
        auto p = logfmt::parser(sbr.to_string_fragment());
10,803✔
2001
        scan_result_t retval = scan_no_match{};
10,803✔
2002
        bool done = false;
10,803✔
2003
        logfmt_pair_handler lph(this->lf_date_time);
10,803✔
2004

2005
        if (dst.empty()) {
10,803✔
2006
            auto file_options = lf.get_file_options();
1,121✔
2007

2008
            if (file_options) {
1,121✔
2009
                this->lf_date_time.dts_default_zone
2010
                    = file_options->second.fo_default_zone.pp_value;
57✔
2011
            } else {
2012
                this->lf_date_time.dts_default_zone = nullptr;
1,064✔
2013
            }
2014
        }
1,121✔
2015

2016
        while (!done) {
25,622✔
2017
            auto parse_result = p.step();
14,819✔
2018

2019
            auto value_res = parse_result.match(
2020
                [&done](const logfmt::parser::end_of_input&) -> scan_result_t {
×
2021
                    done = true;
232✔
2022
                    return scan_match{};
232✔
2023
                },
2024
                [&lph](const logfmt::parser::kvpair& kvp) -> scan_result_t {
×
2025
                    lph.lph_key_frag = kvp.first;
4,016✔
2026

2027
                    return kvp.second.match(
2028
                        [](const logfmt::parser::bool_value& bv)
×
2029
                            -> scan_result_t { return scan_match{}; },
×
2030
                        [&lph](const logfmt::parser::float_value& fv)
×
2031
                            -> scan_result_t {
2032
                            return lph.process_value(fv.fv_str_value);
5✔
2033
                        },
2034
                        [&lph](const logfmt::parser::int_value& iv)
×
2035
                            -> scan_result_t {
2036
                            return lph.process_value(iv.iv_str_value);
108✔
2037
                        },
2038
                        [&lph](const logfmt::parser::quoted_value& qv)
×
2039
                            -> scan_result_t {
2040
                            auto_mem<yajl_handle_t> handle(yajl_free);
343✔
2041
                            yajl_callbacks cb;
2042
                            scan_result_t retval;
343✔
2043

2044
                            memset(&cb, 0, sizeof(cb));
343✔
2045
                            handle = yajl_alloc(&cb, nullptr, &lph);
343✔
2046
                            cb.yajl_string = +[](void* ctx,
686✔
2047
                                                 const unsigned char* str,
2048
                                                 size_t len,
2049
                                                 yajl_string_props_t*) -> int {
2050
                                auto& lph = *((logfmt_pair_handler*) ctx);
343✔
2051
                                string_fragment value_frag{str, 0, (int) len};
343✔
2052

2053
                                auto value_res = lph.process_value(value_frag);
343✔
2054
                                return value_res.is<scan_match>();
686✔
2055
                            };
686✔
2056

2057
                            if (yajl_parse(
343✔
2058
                                    handle,
2059
                                    (const unsigned char*) qv.qv_value.data(),
343✔
2060
                                    qv.qv_value.length())
343✔
2061
                                    != yajl_status_ok
2062
                                || yajl_complete_parse(handle)
343✔
2063
                                    != yajl_status_ok)
2064
                            {
2065
                                log_debug("json parsing failed");
×
2066
                                string_fragment unq_frag{
2067
                                    qv.qv_value.sf_string,
×
2068
                                    qv.qv_value.sf_begin + 1,
×
2069
                                    qv.qv_value.sf_end - 1,
×
2070
                                };
2071

2072
                                return lph.process_value(unq_frag);
×
2073
                            }
2074

2075
                            return scan_match{};
343✔
2076
                        },
343✔
2077
                        [&lph](const logfmt::parser::unquoted_value& uv)
4,016✔
2078
                            -> scan_result_t {
2079
                            return lph.process_value(uv.uv_value);
3,560✔
2080
                        });
8,032✔
2081
                },
2082
                [](const logfmt::parser::error& err) -> scan_result_t {
×
2083
                    // log_error("logfmt parse error: %s", err.e_msg.c_str());
2084
                    return scan_no_match{};
10,571✔
2085
                });
14,819✔
2086
            if (value_res.is<scan_no_match>()) {
14,819✔
2087
                retval = value_res;
10,571✔
2088
                done = true;
10,571✔
2089
            }
2090
        }
14,819✔
2091

2092
        if (lph.lph_found_time) {
10,803✔
2093
            this->lf_timestamp_flags = lph.lph_time_tm.et_flags;
31✔
2094
            dst.emplace_back(
31✔
2095
                li.li_file_range.fr_offset, lph.lph_tv, lph.lph_level);
31✔
2096
            retval = scan_match{2000};
31✔
2097
        }
2098

2099
        return retval;
21,606✔
2100
    }
×
2101

2102
    void annotate(logfile* lf,
11✔
2103
                  uint64_t line_number,
2104
                  string_attrs_t& sa,
2105
                  logline_value_vector& values,
2106
                  bool annotate_module) const override
2107
    {
2108
        static const intern_string_t FIELDS_NAME
2109
            = intern_string::lookup("fields");
15✔
2110

2111
        auto& sbr = values.lvv_sbr;
11✔
2112
        auto p = logfmt::parser(sbr.to_string_fragment());
11✔
2113
        auto done = false;
11✔
2114
        auto found_body = false;
11✔
2115

2116
        while (!done) {
95✔
2117
            auto parse_result = p.step();
84✔
2118

2119
            done = parse_result.match(
168✔
2120
                [](const logfmt::parser::end_of_input&) { return true; },
11✔
2121
                [this, &sa, &values, &found_body](
×
2122
                    const logfmt::parser::kvpair& kvp) {
2123
                    auto value_frag = kvp.second.match(
73✔
2124
                        [this, &kvp, &values](
×
2125
                            const logfmt::parser::bool_value& bv) {
2126
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2127
                                                              kvp.first),
×
2128
                                                          value_kind_t::
2129
                                                              VALUE_INTEGER,
2130
                                                          logline_value_meta::
2131
                                                              table_column{0},
×
2132
                                                          (log_format*) this}
×
2133
                                           .with_struct_name(FIELDS_NAME);
×
2134
                            values.lvv_values.emplace_back(lvm, bv.bv_value);
×
2135

2136
                            return bv.bv_str_value;
×
2137
                        },
×
2138
                        [this, &kvp, &values](
×
2139
                            const logfmt::parser::int_value& iv) {
2140
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2141
                                                              kvp.first),
×
2142
                                                          value_kind_t::
2143
                                                              VALUE_INTEGER,
2144
                                                          logline_value_meta::
2145
                                                              table_column{0},
×
2146
                                                          (log_format*) this}
×
2147
                                           .with_struct_name(FIELDS_NAME);
×
2148
                            values.lvv_values.emplace_back(lvm, iv.iv_value);
×
2149

2150
                            return iv.iv_str_value;
×
2151
                        },
×
2152
                        [this, &kvp, &values](
73✔
2153
                            const logfmt::parser::float_value& fv) {
2154
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2155
                                                              kvp.first),
×
2156
                                                          value_kind_t::
2157
                                                              VALUE_INTEGER,
2158
                                                          logline_value_meta::
2159
                                                              table_column{0},
×
2160
                                                          (log_format*) this}
×
2161
                                           .with_struct_name(FIELDS_NAME);
×
2162
                            values.lvv_values.emplace_back(lvm, fv.fv_value);
×
2163

2164
                            return fv.fv_str_value;
×
2165
                        },
×
2166
                        [](const logfmt::parser::quoted_value& qv) {
×
2167
                            return qv.qv_value;
24✔
2168
                        },
2169
                        [](const logfmt::parser::unquoted_value& uv) {
×
2170
                            return uv.uv_value;
49✔
2171
                        });
2172
                    auto value_lr
2173
                        = line_range{value_frag.sf_begin, value_frag.sf_end};
73✔
2174

2175
                    auto known_field = false;
73✔
2176
                    if (kvp.first.is_one_of(
73✔
2177
                            "timestamp"_frag, "time"_frag, "ts"_frag, "t"_frag))
2178
                    {
2179
                        sa.emplace_back(value_lr, L_TIMESTAMP.value());
11✔
2180
                        known_field = true;
11✔
2181
                    } else if (kvp.first.is_one_of("level"_frag, "lvl"_frag)) {
62✔
2182
                        sa.emplace_back(value_lr, L_LEVEL.value());
11✔
2183
                        known_field = true;
11✔
2184
                    } else if (kvp.first.is_one_of("msg"_frag, "message"_frag))
51✔
2185
                    {
2186
                        sa.emplace_back(value_lr, SA_BODY.value());
11✔
2187
                        found_body = true;
11✔
2188
                    } else if (kvp.second.is<logfmt::parser::quoted_value>()
40✔
2189
                               || kvp.second
78✔
2190
                                      .is<logfmt::parser::unquoted_value>())
38✔
2191
                    {
2192
                        auto lvm
2193
                            = logline_value_meta{intern_string::lookup(
160✔
2194
                                                     kvp.first),
40✔
2195
                                                 value_frag.startswith("\"")
40✔
2196
                                                     ? value_kind_t::VALUE_JSON
2197
                                                     : value_kind_t::VALUE_TEXT,
2198
                                                 logline_value_meta::
2199
                                                     table_column{0},
40✔
2200
                                                 (log_format*) this}
80✔
2201
                                  .with_struct_name(FIELDS_NAME);
40✔
2202
                        values.lvv_values.emplace_back(lvm, value_frag);
40✔
2203
                    }
40✔
2204
                    if (known_field) {
73✔
2205
                        auto key_with_eq = kvp.first;
22✔
2206
                        key_with_eq.sf_end += 1;
22✔
2207
                        sa.emplace_back(to_line_range(key_with_eq),
22✔
2208
                                        SA_REPLACED.value());
44✔
2209
                    } else {
2210
                        sa.emplace_back(to_line_range(kvp.first),
51✔
2211
                                        VC_ROLE.value(role_t::VCR_OBJECT_KEY));
102✔
2212
                    }
2213
                    return false;
73✔
2214
                },
2215
                [line_number, &sbr](const logfmt::parser::error& err) {
84✔
2216
                    log_error("bad line %.*s", sbr.length(), sbr.get_data());
×
2217
                    log_error("%lld:logfmt parse error: %s",
×
2218
                              line_number,
2219
                              err.e_msg.c_str());
2220
                    return true;
×
2221
                });
2222
        }
84✔
2223

2224
        if (!found_body) {
11✔
2225
            sa.emplace_back(line_range::empty_at(sbr.length()),
×
2226
                            SA_BODY.value());
×
2227
        }
2228

2229
        log_format::annotate(lf, line_number, sa, values, annotate_module);
11✔
2230
    }
11✔
2231

2232
    std::shared_ptr<log_format> specialized(int fmt_lock) override
5✔
2233
    {
2234
        auto retval = std::make_shared<logfmt_format>(*this);
5✔
2235

2236
        retval->lf_specialized = true;
5✔
2237
        return retval;
10✔
2238
    }
5✔
2239
};
2240

2241
static auto format_binder = injector::bind_multiple<log_format>()
2242
                                .add<logfmt_format>()
2243
                                .add<bro_log_format>()
2244
                                .add<w3c_log_format>()
2245
                                .add<generic_log_format>()
2246
                                .add<piper_log_format>();
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc