• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tstack / lnav / 21043099841-2763

15 Jan 2026 06:11AM UTC coverage: 68.949% (+0.02%) from 68.934%
21043099841-2763

push

github

tstack
[logfmt] parse spans that are not kv-pairs

84 of 94 new or added lines in 5 files covered. (89.36%)

2 existing lines in 1 file now uncovered.

51802 of 75131 relevant lines covered (68.95%)

434450.69 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.23
/src/log_format_impls.cc
1
/**
2
 * Copyright (c) 2007-2017, Timothy Stack
3
 *
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * * Redistributions of source code must retain the above copyright notice, this
10
 * list of conditions and the following disclaimer.
11
 * * Redistributions in binary form must reproduce the above copyright notice,
12
 * this list of conditions and the following disclaimer in the documentation
13
 * and/or other materials provided with the distribution.
14
 * * Neither the name of Timothy Stack nor the names of its contributors
15
 * may be used to endorse or promote products derived from this software
16
 * without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 *
29
 * @file log_format_impls.cc
30
 */
31

32
#include <algorithm>
33
#include <chrono>
34
#include <memory>
35
#include <utility>
36

37
#include "log_format.hh"
38

39
#include <stdio.h>
40

41
#include "base/injector.bind.hh"
42
#include "base/opt_util.hh"
43
#include "base/string_attr_type.hh"
44
#include "config.h"
45
#include "formats/logfmt/logfmt.parser.hh"
46
#include "log_vtab_impl.hh"
47
#include "ptimec.hh"
48
#include "scn/scan.h"
49
#include "sql_util.hh"
50
#include "yajlpp/yajlpp.hh"
51

52
class piper_log_format : public log_format {
53
public:
54
    const intern_string_t get_name() const override
15,148✔
55
    {
56
        static const intern_string_t RETVAL
57
            = intern_string::lookup("lnav_piper_log");
16,650✔
58

59
        return RETVAL;
15,148✔
60
    }
61

62
    scan_result_t scan(logfile& lf,
12,371✔
63
                       std::vector<logline>& dst,
64
                       const line_info& li,
65
                       shared_buffer_ref& sbr,
66
                       scan_batch_context& sbc) override
67
    {
68
        if (lf.has_line_metadata()
12,371✔
69
            && lf.get_text_format() == text_format_t::TF_LOG)
12,371✔
70
        {
71
            dst.emplace_back(li.li_file_range.fr_offset,
291✔
72
                             to_us(li.li_timestamp),
582✔
73
                             li.li_level);
291✔
74
            return scan_match{1};
291✔
75
        }
76

77
        return scan_no_match{"not a piper capture"};
12,080✔
78
    }
79

80
    static constexpr int TIMESTAMP_SIZE = 28;
81

82
    void annotate(logfile* lf,
41✔
83
                  uint64_t line_number,
84
                  string_attrs_t& sa,
85
                  logline_value_vector& values) const override
86
    {
87
        auto lr = line_range{0, TIMESTAMP_SIZE};
41✔
88
        sa.emplace_back(lr, L_TIMESTAMP.value());
41✔
89
        log_format::annotate(lf, line_number, sa, values);
41✔
90
    }
41✔
91

92
    void get_subline(const log_format_file_state& lffs,
292✔
93
                     const logline& ll,
94
                     shared_buffer_ref& sbr,
95
                     subline_options opts) override
96
    {
97
        this->plf_cached_line.resize(TIMESTAMP_SIZE);
292✔
98
        auto tlen = sql_strftime(this->plf_cached_line.data(),
292✔
99
                                 this->plf_cached_line.size(),
100
                                 ll.get_timeval(),
292✔
101
                                 'T');
102
        this->plf_cached_line.resize(tlen);
292✔
103
        {
104
            char zone_str[16];
105
            exttm tmptm;
292✔
106

107
            tmptm.et_flags |= ETF_ZONE_SET;
292✔
108
            tmptm.et_gmtoff
109
                = lnav::local_time_to_info(
584✔
110
                      date::local_seconds{ll.get_time<std::chrono::seconds>()})
292✔
111
                      .first.offset.count();
292✔
112
            off_t zone_len = 0;
292✔
113
            ftime_z(zone_str, zone_len, sizeof(zone_str), tmptm);
292✔
114
            for (off_t lpc = 0; lpc < zone_len; lpc++) {
1,752✔
115
                this->plf_cached_line.push_back(zone_str[lpc]);
1,460✔
116
            }
117
        }
118
        this->plf_cached_line.push_back(' ');
292✔
119
        const auto prefix_len = this->plf_cached_line.size();
292✔
120
        this->plf_cached_line.resize(this->plf_cached_line.size()
584✔
121
                                     + sbr.length());
292✔
122
        memcpy(
292✔
123
            &this->plf_cached_line[prefix_len], sbr.get_data(), sbr.length());
292✔
124

125
        sbr.share(this->plf_share_manager,
584✔
126
                  this->plf_cached_line.data(),
292✔
127
                  this->plf_cached_line.size());
128
    }
292✔
129

130
    std::shared_ptr<log_format> specialized(int fmt_lock) override
6✔
131
    {
132
        auto retval = std::make_shared<piper_log_format>(*this);
6✔
133

134
        retval->lf_specialized = true;
6✔
135
        retval->lf_timestamp_flags |= ETF_ZONE_SET | ETF_MICROS_SET;
6✔
136
        return retval;
12✔
137
    }
6✔
138

139
private:
140
    shared_buffer plf_share_manager;
141
    std::vector<char> plf_cached_line;
142
};
143

144
class generic_log_format : public log_format {
145
public:
146
    static const pcre_format* get_pcre_log_formats()
12,425✔
147
    {
148
        static const pcre_format log_fmt[] = {
149
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>@[0-9a-zA-Z]{16,24}))"),
150
            pcre_format(
151
                R"(^(?:\*\*\*\s+)?(?<timestamp>(?:\s|\d{4}[\-\/]\d{2}[\-\/]\d{2}|T|\d{1,2}:\d{2}(?::\d{2}(?:[\.,]\d{1,6})?)?|Z|[+\-]\d{2}:?\d{2}|(?!DBG|DEBUG|ERR|INFO|WARN|NONE)[A-Z]{3,4})+)[:|\s]?(trc|trace|dbg|debug|info|warn(?:ing)?|err(?:or)?)[:|\s]\s*)"),
152
            pcre_format(
153
                R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,+/-]+) \[(trace|debug|info|warn(?:ing)?|error|critical)\]\s+)"),
154
            pcre_format(
155
                R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,+/-]+) -- (trace|debug|info|warn(?:ing)?|error|critical) --\s+)"),
156

157
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+/\.-]+) \[\w\s+)"),
158
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+,/\.-]+)\s+)"),
159
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+,/\.-]+) -\s+)"),
160
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,/-]+) -\s+)"),
161
            pcre_format(
162
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\] \[(trace|debug|info|warn(?:ing)?|error|critical)\]\s+)"),
163
            pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w: "
164
                        "\\.,/-]+)\\[[^\\]]+\\]\\s+"),
165
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,/-]+)\s+)"),
166

167
            pcre_format(
168
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\]\s*(\w+):?\s+)"),
169
            pcre_format(
170
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\]\s+)"),
171
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: "
172
                        "\\.,+/-]+)\\] \\w+\\s+"),
173
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: ,+/-]+)\\] "
174
                        "\\(\\d+\\)\\s+"),
175

176
            pcre_format(),
177
        };
12,425✔
178

179
        return log_fmt;
12,425✔
180
    }
181

182
    std::string get_pattern_regex(const pattern_locks& pl,
×
183
                                  uint64_t line_number) const override
184
    {
185
        auto pat_index = pl.pattern_index_for_line(line_number);
×
186
        return get_pcre_log_formats()[pat_index].name;
×
187
    }
188

189
    const intern_string_t get_name() const override
14,777✔
190
    {
191
        static const intern_string_t RETVAL
192
            = intern_string::lookup("generic_log");
16,279✔
193

194
        return RETVAL;
14,777✔
195
    }
196

197
    scan_result_t scan(logfile& lf,
12,334✔
198
                       std::vector<logline>& dst,
199
                       const line_info& li,
200
                       shared_buffer_ref& sbr,
201
                       scan_batch_context& sbc) override
202
    {
203
        exttm log_time;
12,334✔
204
        timeval log_tv;
205
        string_fragment ts;
12,334✔
206
        std::optional<string_fragment> level;
12,334✔
207
        const char* last_pos;
208

209
        if (dst.empty()) {
12,334✔
210
            auto file_options = lf.get_file_options();
203✔
211

212
            if (file_options) {
203✔
213
                this->lf_date_time.dts_default_zone
214
                    = file_options->second.fo_default_zone.pp_value;
2✔
215
            } else {
216
                this->lf_date_time.dts_default_zone = nullptr;
201✔
217
            }
218
        }
203✔
219

220
        if ((last_pos = this->log_scanf(sbc,
24,668✔
221
                                        dst.size(),
12,334✔
222
                                        sbr.to_string_fragment(),
223
                                        get_pcre_log_formats(),
224
                                        nullptr,
225
                                        &log_time,
226
                                        &log_tv,
227

228
                                        &ts,
229
                                        &level))
230
            != nullptr)
12,334✔
231
        {
232
            auto level_val = log_level_t::LEVEL_UNKNOWN;
2,391✔
233
            if (level) {
2,391✔
234
                level_val = string2level(level->data(), level->length());
2,391✔
235
            }
236

237
            if (!((log_time.et_flags & ETF_DAY_SET)
2,391✔
238
                  && (log_time.et_flags & ETF_MONTH_SET)
2,316✔
239
                  && (log_time.et_flags & ETF_YEAR_SET)))
2,316✔
240
            {
241
                this->check_for_new_year(dst, log_time, log_tv);
688✔
242
            }
243

244
            if (!(this->lf_timestamp_flags
4,782✔
245
                  & (ETF_MILLIS_SET | ETF_MICROS_SET | ETF_NANOS_SET))
2,391✔
246
                && !dst.empty()
2,064✔
247
                && dst.back().get_time<std::chrono::seconds>().count()
2,062✔
248
                    == log_tv.tv_sec
2,062✔
249
                && dst.back()
5,319✔
250
                        .get_subsecond_time<std::chrono::microseconds>()
3,255✔
251
                        .count()
864✔
252
                    != 0)
253
            {
254
                auto log_ms
255
                    = dst.back()
×
256
                          .get_subsecond_time<std::chrono::microseconds>();
×
257

258
                log_time.et_nsec
259
                    = std::chrono::duration_cast<std::chrono::nanoseconds>(
×
260
                          log_ms)
261
                          .count();
×
262
                log_tv.tv_usec
263
                    = std::chrono::duration_cast<std::chrono::microseconds>(
×
264
                          log_ms)
265
                          .count();
×
266
            }
267

268
            auto log_us = to_us(log_tv);
2,391✔
269
            auto tid_iter = sbc.sbc_tids.insert_tid(
2,391✔
270
                sbc.sbc_allocator, string_fragment{}, log_us);
×
271
            tid_iter->second.titr_level_stats.update_msg_count(level_val);
2,391✔
272
            dst.emplace_back(li.li_file_range.fr_offset, log_us, level_val);
2,391✔
273
            return scan_match{5};
2,391✔
274
        }
275

276
        return scan_no_match{"no patterns matched"};
9,943✔
277
    }
278

279
    void annotate(logfile* lf,
91✔
280
                  uint64_t line_number,
281
                  string_attrs_t& sa,
282
                  logline_value_vector& values) const override
283
    {
284
        thread_local auto md = lnav::pcre2pp::match_data::unitialized();
91✔
285
        auto lffs = lf->get_format_file_state();
91✔
286
        auto& line = values.lvv_sbr;
91✔
287
        int pat_index
288
            = lffs.lffs_pattern_locks.pattern_index_for_line(line_number);
91✔
289
        const auto& fmt = get_pcre_log_formats()[pat_index];
91✔
290
        int prefix_len = 0;
91✔
291
        const auto line_sf = line.to_string_fragment();
91✔
292
        auto match_res = fmt.pcre->capture_from(line_sf)
91✔
293
                             .into(md)
91✔
294
                             .matches(PCRE2_NO_UTF_CHECK)
182✔
295
                             .ignore_error();
91✔
296
        if (!match_res) {
91✔
297
            return;
11✔
298
        }
299

300
        auto ts_cap = md[fmt.pf_timestamp_index].value();
80✔
301
        auto lr = to_line_range(ts_cap.trim());
80✔
302
        sa.emplace_back(lr, L_TIMESTAMP.value());
80✔
303

304
        values.lvv_values.emplace_back(TS_META, line, lr);
80✔
305
        values.lvv_values.back().lv_meta.lvm_format = (log_format*) this;
80✔
306

307
        prefix_len = md[0]->sf_end;
80✔
308
        auto level_cap = md[2];
80✔
309
        if (level_cap) {
80✔
310
            if (string2level(level_cap->data(), level_cap->length(), true)
73✔
311
                != LEVEL_UNKNOWN)
73✔
312
            {
313
                values.lvv_values.emplace_back(
73✔
314
                    LEVEL_META, line, to_line_range(level_cap->trim()));
73✔
315
                values.lvv_values.back().lv_meta.lvm_format
73✔
316
                    = (log_format*) this;
73✔
317

318
                lr = to_line_range(level_cap->trim());
73✔
319
                if (lr.lr_end != (ssize_t) line.length()) {
73✔
320
                    sa.emplace_back(lr, L_LEVEL.value());
73✔
321
                }
322
            }
323
        }
324

325
        lr.lr_start = 0;
80✔
326
        lr.lr_end = prefix_len;
80✔
327
        sa.emplace_back(lr, L_PREFIX.value());
80✔
328

329
        lr.lr_start = prefix_len;
80✔
330
        lr.lr_end = line.length();
80✔
331
        sa.emplace_back(lr, SA_BODY.value());
80✔
332

333
        log_format::annotate(lf, line_number, sa, values);
80✔
334
    }
335

336
    std::shared_ptr<log_format> specialized(int fmt_lock) override
52✔
337
    {
338
        auto retval = std::make_shared<generic_log_format>(*this);
52✔
339

340
        retval->lf_specialized = true;
52✔
341
        return retval;
104✔
342
    }
52✔
343

344
    bool hide_field(const intern_string_t field_name, bool val) override
2✔
345
    {
346
        if (field_name == TS_META.lvm_name) {
2✔
347
            TS_META.lvm_user_hidden = val;
1✔
348
            return true;
1✔
349
        }
350
        if (field_name == LEVEL_META.lvm_name) {
1✔
351
            LEVEL_META.lvm_user_hidden = val;
1✔
352
            return true;
1✔
353
        }
354
        if (field_name == OPID_META.lvm_name) {
×
355
            OPID_META.lvm_user_hidden = val;
×
356
            return true;
×
357
        }
358
        return false;
×
359
    }
360

361
    std::map<intern_string_t, logline_value_meta> get_field_states() override
53✔
362
    {
363
        return {
364
            {TS_META.lvm_name, TS_META},
365
            {LEVEL_META.lvm_name, LEVEL_META},
366
            {OPID_META.lvm_name, OPID_META},
367
        };
265✔
368
    }
53✔
369

370
private:
371
    static logline_value_meta TS_META;
372
    static logline_value_meta LEVEL_META;
373
    static logline_value_meta OPID_META;
374
};
375

376
logline_value_meta generic_log_format::TS_META{
377
    intern_string::lookup("log_time"),
378
    value_kind_t::VALUE_TEXT,
379
    logline_value_meta::table_column{2},
380
};
381

382
logline_value_meta generic_log_format::LEVEL_META{
383
    intern_string::lookup("log_level"),
384
    value_kind_t::VALUE_TEXT,
385
    logline_value_meta::table_column{3},
386
};
387

388
logline_value_meta generic_log_format::OPID_META{
389
    intern_string::lookup("log_opid"),
390
    value_kind_t::VALUE_TEXT,
391
    logline_value_meta::internal_column{},
392
};
393

394
std::string
395
from_escaped_string(const char* str, size_t len)
24✔
396
{
397
    std::string retval;
24✔
398

399
    for (size_t lpc = 0; lpc < len; lpc++) {
48✔
400
        switch (str[lpc]) {
24✔
401
            case '\\':
24✔
402
                if ((lpc + 3) < len && str[lpc + 1] == 'x') {
24✔
403
                    int ch;
404

405
                    if (sscanf(&str[lpc + 2], "%2x", &ch) == 1) {
24✔
406
                        retval.append(1, (char) ch & 0xff);
24✔
407
                        lpc += 3;
24✔
408
                    }
409
                }
410
                break;
24✔
411
            default:
×
412
                retval.append(1, str[lpc]);
×
413
                break;
×
414
        }
415
    }
416

417
    return retval;
24✔
418
}
×
419

420
std::optional<const char*>
421
lnav_strnstr(const char* s, const char* find, size_t slen)
1,652,478✔
422
{
423
    char c, sc;
424
    size_t len;
425

426
    if ((c = *find++) != '\0') {
1,652,478✔
427
        len = strlen(find);
1,652,478✔
428
        do {
429
            do {
430
                if (slen < 1 || (sc = *s) == '\0') {
7,237,139✔
431
                    return std::nullopt;
896,541✔
432
                }
433
                --slen;
6,340,598✔
434
                ++s;
6,340,598✔
435
            } while (sc != c);
6,340,598✔
436
            if (len > slen) {
755,937✔
437
                return std::nullopt;
×
438
            }
439
        } while (strncmp(s, find, len) != 0);
755,937✔
440
        s--;
755,937✔
441
    }
442
    return s;
755,937✔
443
}
444

445
struct separated_string {
446
    const char* ss_str;
447
    size_t ss_len;
448
    const char* ss_separator;
449
    size_t ss_separator_len;
450

451
    separated_string(const char* str, size_t len)
35,383✔
452
        : ss_str(str), ss_len(len), ss_separator(","),
35,383✔
453
          ss_separator_len(strlen(this->ss_separator))
35,383✔
454
    {
455
    }
35,383✔
456

457
    separated_string& with_separator(const char* sep)
35,383✔
458
    {
459
        this->ss_separator = sep;
35,383✔
460
        this->ss_separator_len = strlen(sep);
35,383✔
461
        return *this;
35,383✔
462
    }
463

464
    struct iterator {
465
        const separated_string& i_parent;
466
        const char* i_pos;
467
        const char* i_next_pos;
468
        size_t i_index;
469

470
        iterator(const separated_string& ss, const char* pos)
861,538✔
471
            : i_parent(ss), i_pos(pos), i_next_pos(pos), i_index(0)
861,538✔
472
        {
473
            this->update();
861,538✔
474
        }
861,538✔
475

476
        void update()
1,652,478✔
477
        {
478
            const separated_string& ss = this->i_parent;
1,652,478✔
479
            auto next_field
480
                = lnav_strnstr(this->i_pos,
1,652,478✔
481
                               ss.ss_separator,
1,652,478✔
482
                               ss.ss_len - (this->i_pos - ss.ss_str));
1,652,478✔
483
            if (next_field) {
1,652,478✔
484
                this->i_next_pos = next_field.value() + ss.ss_separator_len;
755,937✔
485
            } else {
486
                this->i_next_pos = ss.ss_str + ss.ss_len;
896,541✔
487
            }
488
        }
1,652,478✔
489

490
        iterator& operator++()
790,940✔
491
        {
492
            this->i_pos = this->i_next_pos;
790,940✔
493
            this->update();
790,940✔
494
            this->i_index += 1;
790,940✔
495

496
            return *this;
790,940✔
497
        }
498

499
        string_fragment operator*()
701,338✔
500
        {
501
            const auto& ss = this->i_parent;
701,338✔
502
            int end;
503

504
            if (this->i_next_pos < (ss.ss_str + ss.ss_len)) {
701,338✔
505
                end = this->i_next_pos - ss.ss_str - ss.ss_separator_len;
671,043✔
506
            } else {
507
                end = this->i_next_pos - ss.ss_str;
30,295✔
508
            }
509
            return string_fragment::from_byte_range(
701,338✔
510
                ss.ss_str, this->i_pos - ss.ss_str, end);
701,338✔
511
        }
512

513
        bool operator==(const iterator& other) const
826,155✔
514
        {
515
            return (&this->i_parent == &other.i_parent)
826,155✔
516
                && (this->i_pos == other.i_pos);
826,155✔
517
        }
518

519
        bool operator!=(const iterator& other) const
825,987✔
520
        {
521
            return !(*this == other);
825,987✔
522
        }
523

524
        size_t index() const { return this->i_index; }
1,718,319✔
525
    };
526

527
    iterator begin() { return {*this, this->ss_str}; }
35,383✔
528

529
    iterator end() { return {*this, this->ss_str + this->ss_len}; }
826,155✔
530
};
531

532
class bro_log_format : public log_format {
533
public:
534
    static const intern_string_t TS;
535
    static const intern_string_t DURATION;
536
    struct field_def {
537
        logline_value_meta fd_meta;
538
        logline_value_meta* fd_root_meta;
539
        std::string fd_collator;
540
        std::optional<size_t> fd_numeric_index;
541

542
        explicit field_def(const intern_string_t name,
680✔
543
                           size_t col,
544
                           log_format* format)
545
            : fd_meta(name,
1,360✔
546
                      value_kind_t::VALUE_TEXT,
547
                      logline_value_meta::table_column{col},
680✔
548
                      format),
549
              fd_root_meta(&FIELD_META.find(name)->second)
680✔
550
        {
551
        }
680✔
552

553
        field_def& with_kind(value_kind_t kind,
500✔
554
                             bool identifier = false,
555
                             bool foreign_key = false,
556
                             const std::string& collator = "")
557
        {
558
            this->fd_meta.lvm_kind = kind;
500✔
559
            this->fd_meta.lvm_identifier = identifier;
500✔
560
            this->fd_meta.lvm_foreign_key = foreign_key;
500✔
561
            this->fd_collator = collator;
500✔
562
            return *this;
500✔
563
        }
564

565
        field_def& with_numeric_index(size_t index)
126✔
566
        {
567
            this->fd_numeric_index = index;
126✔
568
            return *this;
126✔
569
        }
570
    };
571

572
    static std::unordered_map<const intern_string_t, logline_value_meta>
573
        FIELD_META;
574

575
    static const intern_string_t get_opid_desc()
757✔
576
    {
577
        static const intern_string_t RETVAL = intern_string::lookup("std");
2,271✔
578

579
        return RETVAL;
757✔
580
    }
581

582
    bro_log_format()
757✔
583
    {
757✔
584
        this->lf_structured = true;
757✔
585
        this->lf_is_self_describing = true;
757✔
586
        this->lf_time_ordered = false;
757✔
587
        this->lf_timestamp_point_of_reference
588
            = timestamp_point_of_reference_t::start;
757✔
589

590
        auto desc_v = std::make_shared<std::vector<opid_descriptor>>();
757✔
591
        desc_v->emplace({});
757✔
592
        auto emplace_res = this->lf_opid_description_def->emplace(
1,514✔
593
            get_opid_desc(), opid_descriptors{desc_v, 0});
1,514✔
594
        this->lf_opid_description_def_vec->emplace_back(
757✔
595
            &emplace_res.first->second);
757✔
596
    }
757✔
597

598
    const intern_string_t get_name() const override
116,641✔
599
    {
600
        static const intern_string_t name(intern_string::lookup("bro"));
118,143✔
601

602
        return this->blf_format_name.empty() ? name : this->blf_format_name;
116,641✔
603
    }
604

605
    void clear() override
12,395✔
606
    {
607
        this->log_format::clear();
12,395✔
608
        this->blf_format_name.clear();
12,395✔
609
        this->blf_field_defs.clear();
12,395✔
610
    }
12,395✔
611

612
    std::vector<logline_value_meta> get_value_metadata() const override
×
613
    {
614
        std::vector<logline_value_meta> retval;
×
615

616
        for (const auto& fd : this->blf_field_defs) {
×
617
            retval.emplace_back(fd.fd_meta);
×
618
        }
619
        return retval;
×
620
    }
×
621

622
    scan_result_t scan_int(std::vector<logline>& dst,
4,857✔
623
                           const line_info& li,
624
                           shared_buffer_ref& sbr,
625
                           scan_batch_context& sbc)
626
    {
627
        static const intern_string_t STATUS_CODE
628
            = intern_string::lookup("bro_status_code");
4,903✔
629
        static const intern_string_t UID = intern_string::lookup("bro_uid");
4,903✔
630
        static const intern_string_t ID_ORIG_H
631
            = intern_string::lookup("bro_id_orig_h");
4,903✔
632

633
        separated_string ss(sbr.get_data(), sbr.length());
4,857✔
634
        timeval tv;
635
        exttm tm;
4,857✔
636
        size_t found_ts = 0;
4,857✔
637
        log_level_t level = LEVEL_INFO;
4,857✔
638
        uint64_t opid_bloom = 0;
4,857✔
639
        auto opid_cap = string_fragment::invalid();
4,857✔
640
        auto host_cap = string_fragment::invalid();
4,857✔
641
        auto duration = std::chrono::microseconds{0};
4,857✔
642

643
        sbc.sbc_value_stats.resize(this->blf_field_defs.size());
4,857✔
644
        ss.with_separator(this->blf_separator.get());
4,857✔
645

646
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
143,439✔
647
            if (iter.index() == 0 && *iter == "#close") {
138,608✔
648
                return scan_match{2000};
26✔
649
            }
650

651
            if (iter.index() >= this->blf_field_defs.size()) {
138,582✔
652
                break;
×
653
            }
654

655
            const auto& fd = this->blf_field_defs[iter.index()];
138,582✔
656

657
            if (TS == fd.fd_meta.lvm_name) {
138,582✔
658
                static const char* const TIME_FMT[] = {"%s.%f"};
659
                const auto sf = *iter;
4,830✔
660

661
                if (this->lf_date_time.scan(
4,830✔
662
                        sf.data(), sf.length(), TIME_FMT, &tm, tv))
4,830✔
663
                {
664
                    this->lf_timestamp_flags = tm.et_flags;
4,830✔
665
                    found_ts += 1;
4,830✔
666
                }
667
            } else if (STATUS_CODE == fd.fd_meta.lvm_name) {
133,752✔
668
                const auto sf = *iter;
4,644✔
669

670
                if (!sf.empty() && sf[0] >= '4') {
4,644✔
671
                    level = LEVEL_ERROR;
23✔
672
                }
673
            } else if (UID == fd.fd_meta.lvm_name) {
129,108✔
674
                opid_cap = *iter;
4,830✔
675

676
                opid_bloom = opid_cap.bloom_bits();
4,830✔
677
            } else if (ID_ORIG_H == fd.fd_meta.lvm_name) {
124,278✔
678
                host_cap = *iter;
4,830✔
679
            } else if (DURATION == fd.fd_meta.lvm_name) {
119,448✔
680
                const auto sf = *iter;
186✔
681
                auto scan_res = scn::scan<double>("{}", sf.to_string_view());
186✔
682
                if (scan_res) {
186✔
683
                    duration = std::chrono::microseconds{
×
684
                        static_cast<long long>(scan_res->value() * 1000000)};
685
                }
686
            }
687

688
            if (fd.fd_numeric_index) {
138,582✔
689
                switch (fd.fd_meta.lvm_kind) {
24,708✔
690
                    case value_kind_t::VALUE_INTEGER:
24,708✔
691
                    case value_kind_t::VALUE_FLOAT: {
692
                        const auto sv = (*iter).to_string_view();
24,708✔
693
                        auto scan_float_res = scn::scan_value<double>(sv);
24,708✔
694
                        if (scan_float_res) {
24,708✔
695
                            sbc.sbc_value_stats[fd.fd_numeric_index.value()]
20,064✔
696
                                .add_value(scan_float_res->value());
20,064✔
697
                        }
698
                        break;
24,708✔
699
                    }
700
                    default:
×
701
                        break;
×
702
                }
703
            }
704
        }
705

706
        if (found_ts == 1) {
4,831✔
707
            if (!this->lf_specialized) {
4,830✔
708
                for (auto& ll : dst) {
216✔
709
                    ll.set_ignore(true);
192✔
710
                }
711
            }
712

713
            auto log_us = to_us(tv);
4,830✔
714
            if (opid_cap.is_valid()) {
4,830✔
715
                auto opid_iter = sbc.sbc_opids.insert_op(
4,830✔
716
                    sbc.sbc_allocator,
717
                    opid_cap,
718
                    log_us,
719
                    this->lf_timestamp_point_of_reference,
720
                    duration);
721
                opid_iter->second.otr_level_stats.update_msg_count(level);
4,830✔
722

723
                auto& otr = opid_iter->second;
4,830✔
724
                if (!otr.otr_description.lod_index && host_cap.is_valid()
7,039✔
725
                    && otr.otr_description.lod_elements.empty())
7,039✔
726
                {
727
                    otr.otr_description.lod_index = 0;
2,209✔
728
                    otr.otr_description.lod_elements.insert(
4,418✔
729
                        0, host_cap.to_string());
2,209✔
730
                }
731
            }
732
            dst.emplace_back(li.li_file_range.fr_offset, log_us, level);
4,830✔
733
            dst.back().merge_bloom_bits(opid_bloom);
4,830✔
734
            return scan_match{2000};
4,830✔
735
        }
736
        return scan_no_match{"no header found"};
1✔
737
    }
738

739
    scan_result_t scan(logfile& lf,
12,465✔
740
                       std::vector<logline>& dst,
741
                       const line_info& li,
742
                       shared_buffer_ref& sbr,
743
                       scan_batch_context& sbc) override
744
    {
745
        static const auto SEP_RE
746
            = lnav::pcre2pp::code::from_const(R"(^#separator\s+(.+))");
12,465✔
747

748
        if (dst.empty()) {
12,465✔
749
            auto file_options = lf.get_file_options();
1,244✔
750

751
            if (file_options) {
1,244✔
752
                this->lf_date_time.dts_default_zone
753
                    = file_options->second.fo_default_zone.pp_value;
57✔
754
            } else {
755
                this->lf_date_time.dts_default_zone = nullptr;
1,187✔
756
            }
757
        }
1,244✔
758

759
        if (!this->blf_format_name.empty()) {
12,465✔
760
            return this->scan_int(dst, li, sbr, sbc);
4,833✔
761
        }
762

763
        if (dst.size() <= 2 || dst.size() > 20 || sbr.empty()
13,050✔
764
            || sbr.get_data()[0] == '#')
13,050✔
765
        {
766
            return scan_no_match{"no header found"};
5,977✔
767
        }
768

769
        auto line_iter = dst.begin();
1,655✔
770
        auto read_result = lf.read_line(line_iter);
1,655✔
771

772
        if (read_result.isErr()) {
1,655✔
773
            return scan_no_match{"unable to read first line"};
×
774
        }
775

776
        auto line = read_result.unwrap();
1,655✔
777
        auto md = SEP_RE.create_match_data();
1,655✔
778

779
        auto match_res = SEP_RE.capture_from(line.to_string_fragment())
1,655✔
780
                             .into(md)
1,655✔
781
                             .matches(PCRE2_NO_UTF_CHECK)
3,310✔
782
                             .ignore_error();
1,655✔
783
        if (!match_res) {
1,655✔
784
            return scan_no_match{"cannot read separator header"};
1,631✔
785
        }
786

787
        this->clear();
24✔
788

789
        auto sep = from_escaped_string(md[1]->data(), md[1]->length());
24✔
790
        this->blf_separator = intern_string::lookup(sep);
24✔
791

792
        for (++line_iter; line_iter != dst.end(); ++line_iter) {
192✔
793
            auto next_read_result = lf.read_line(line_iter);
168✔
794

795
            if (next_read_result.isErr()) {
168✔
796
                return scan_no_match{"unable to read header line"};
×
797
            }
798

799
            line = next_read_result.unwrap();
168✔
800
            separated_string ss(line.get_data(), line.length());
168✔
801

802
            ss.with_separator(this->blf_separator.get());
168✔
803
            auto iter = ss.begin();
168✔
804

805
            string_fragment directive = *iter;
168✔
806

807
            if (directive.empty() || directive[0] != '#') {
168✔
808
                continue;
×
809
            }
810

811
            ++iter;
168✔
812
            if (iter == ss.end()) {
168✔
813
                continue;
×
814
            }
815

816
            if (directive == "#set_separator") {
168✔
817
                this->blf_set_separator = intern_string::lookup(*iter);
24✔
818
            } else if (directive == "#empty_field") {
144✔
819
                this->blf_empty_field = intern_string::lookup(*iter);
24✔
820
            } else if (directive == "#unset_field") {
120✔
821
                this->blf_unset_field = intern_string::lookup(*iter);
24✔
822
            } else if (directive == "#path") {
96✔
823
                auto full_name = fmt::format(FMT_STRING("bro_{}_log"), *iter);
72✔
824
                this->blf_format_name = intern_string::lookup(full_name);
24✔
825
            } else if (directive == "#fields" && this->blf_field_defs.empty()) {
96✔
826
                do {
827
                    auto field_name
828
                        = intern_string::lookup("bro_" + sql_safe_ident(*iter));
680✔
829
                    auto common_iter = FIELD_META.find(field_name);
680✔
830
                    if (common_iter == FIELD_META.end()) {
680✔
831
                        FIELD_META.emplace(field_name,
674✔
832
                                           logline_value_meta{
1,348✔
833
                                               field_name,
834
                                               value_kind_t::VALUE_TEXT,
835
                                           });
836
                    }
837
                    this->blf_field_defs.emplace_back(
1,360✔
838
                        field_name, this->blf_field_defs.size(), this);
680✔
839
                    ++iter;
680✔
840
                } while (iter != ss.end());
680✔
841
            } else if (directive == "#types") {
48✔
842
                static const char* KNOWN_IDS[] = {
843
                    "bro_conn_uids",
844
                    "bro_fuid",
845
                    "bro_host",
846
                    "bro_info_code",
847
                    "bro_method",
848
                    "bro_mime_type",
849
                    "bro_orig_fuids",
850
                    "bro_parent_fuid",
851
                    "bro_proto",
852
                    "bro_referrer",
853
                    "bro_resp_fuids",
854
                    "bro_service",
855
                    "bro_uid",
856
                    "bro_uri",
857
                    "bro_user_agent",
858
                    "bro_username",
859
                };
860
                static const char* KNOWN_FOREIGN[] = {
861
                    "bro_status_code",
862
                };
863

864
                int numeric_count = 0;
24✔
865

866
                do {
867
                    string_fragment field_type = *iter;
680✔
868
                    auto& fd = this->blf_field_defs[iter.index() - 1];
680✔
869

870
                    if (field_type == "time") {
680✔
871
                        fd.with_kind(value_kind_t::VALUE_TIMESTAMP);
48✔
872
                    } else if (field_type == "string") {
656✔
873
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
500✔
874
                                                        std::end(KNOWN_IDS),
875
                                                        fd.fd_meta.lvm_name);
250✔
876
                        fd.with_kind(value_kind_t::VALUE_TEXT, ident);
500✔
877
                    } else if (field_type == "count") {
406✔
878
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
248✔
879
                                                        std::end(KNOWN_IDS),
880
                                                        fd.fd_meta.lvm_name);
124✔
881
                        bool foreign
882
                            = std::binary_search(std::begin(KNOWN_FOREIGN),
248✔
883
                                                 std::end(KNOWN_FOREIGN),
884
                                                 fd.fd_meta.lvm_name);
124✔
885
                        fd.with_kind(
248✔
886
                              value_kind_t::VALUE_INTEGER, ident, foreign)
887
                            .with_numeric_index(numeric_count);
124✔
888
                        numeric_count += 1;
124✔
889
                    } else if (field_type == "bool") {
282✔
890
                        fd.with_kind(value_kind_t::VALUE_BOOLEAN);
8✔
891
                    } else if (field_type == "addr") {
278✔
892
                        fd.with_kind(
96✔
893
                            value_kind_t::VALUE_TEXT, true, false, "ipaddress");
894
                    } else if (field_type == "port") {
230✔
895
                        fd.with_kind(value_kind_t::VALUE_INTEGER, true);
96✔
896
                    } else if (field_type == "interval") {
182✔
897
                        fd.with_kind(value_kind_t::VALUE_FLOAT)
4✔
898
                            .with_numeric_index(numeric_count);
2✔
899
                        numeric_count += 1;
2✔
900
                    }
901

902
                    ++iter;
680✔
903
                } while (iter != ss.end());
680✔
904
            }
905
        }
168✔
906

907
        if (!this->blf_format_name.empty() && !this->blf_separator.empty()
48✔
908
            && !this->blf_field_defs.empty())
48✔
909
        {
910
            return this->scan_int(dst, li, sbr, sbc);
24✔
911
        }
912

913
        this->blf_format_name.clear();
×
914

915
        return scan_no_match{"no header found"};
×
916
    }
1,655✔
917

918
    void annotate(logfile* lf,
30,358✔
919
                  uint64_t line_number,
920
                  string_attrs_t& sa,
921
                  logline_value_vector& values) const override
922
    {
923
        static const intern_string_t UID = intern_string::lookup("bro_uid");
30,398✔
924

925
        auto& sbr = values.lvv_sbr;
30,358✔
926
        separated_string ss(sbr.get_data(), sbr.length());
30,358✔
927

928
        ss.with_separator(this->blf_separator.get());
30,358✔
929

930
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
681,188✔
931
            if (iter.index() >= this->blf_field_defs.size()) {
651,037✔
932
                return;
207✔
933
            }
934

935
            const field_def& fd = this->blf_field_defs[iter.index()];
650,830✔
936
            string_fragment sf = *iter;
650,830✔
937

938
            if (sf == this->blf_empty_field) {
650,830✔
939
                sf.clear();
30,161✔
940
            } else if (sf == this->blf_unset_field) {
620,669✔
941
                sf.invalidate();
74,315✔
942
            }
943

944
            auto lr = line_range(sf.sf_begin, sf.sf_end);
650,830✔
945

946
            if (fd.fd_meta.lvm_name == TS) {
650,830✔
947
                sa.emplace_back(lr, L_TIMESTAMP.value());
30,358✔
948
            } else if (fd.fd_meta.lvm_name == UID) {
620,472✔
949
                sa.emplace_back(lr, L_OPID.value());
30,358✔
950
                values.lvv_opid_value = sf.to_string();
30,358✔
951
                values.lvv_opid_provenance
952
                    = logline_value_vector::opid_provenance::file;
30,358✔
953
            }
954

955
            if (lr.is_valid()) {
650,830✔
956
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
576,515✔
957
            } else {
958
                values.lvv_values.emplace_back(fd.fd_meta);
74,315✔
959
            }
960
            values.lvv_values.back().lv_meta.lvm_user_hidden
650,830✔
961
                = fd.fd_root_meta->lvm_user_hidden;
650,830✔
962
        }
963

964
        log_format::annotate(lf, line_number, sa, values);
30,151✔
965
    }
966

967
    std::optional<size_t> stats_index_for_value(
36✔
968
        const intern_string_t& name) const override
969
    {
970
        for (const auto& blf_field_def : this->blf_field_defs) {
540✔
971
            if (blf_field_def.fd_meta.lvm_name == name) {
540✔
972
                if (!blf_field_def.fd_numeric_index) {
36✔
973
                    break;
×
974
                }
975
                return blf_field_def.fd_numeric_index.value();
36✔
976
            }
977
        }
978

979
        return std::nullopt;
×
980
    }
981

982
    bool hide_field(intern_string_t field_name, bool val) override
2✔
983
    {
984
        if (field_name == LOG_TIME_STR) {
2✔
985
            field_name = TS;
×
986
        }
987

988
        auto fd_iter = FIELD_META.find(field_name);
2✔
989
        if (fd_iter == FIELD_META.end()) {
2✔
990
            return false;
×
991
        }
992

993
        fd_iter->second.lvm_user_hidden = val;
2✔
994

995
        return true;
2✔
996
    }
997

998
    std::map<intern_string_t, logline_value_meta> get_field_states() override
53✔
999
    {
1000
        std::map<intern_string_t, logline_value_meta> retval;
53✔
1001

1002
        for (const auto& fd : FIELD_META) {
169✔
1003
            retval.emplace(fd.first, fd.second);
116✔
1004
        }
1005

1006
        return retval;
53✔
1007
    }
×
1008

1009
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
24✔
1010
    {
1011
        auto retval = std::make_shared<bro_log_format>(*this);
24✔
1012

1013
        retval->lf_specialized = true;
24✔
1014
        return retval;
48✔
1015
    }
24✔
1016

1017
    class bro_log_table : public log_format_vtab_impl {
1018
    public:
1019
        explicit bro_log_table(std::shared_ptr<const log_format> format)
22✔
1020
            : log_format_vtab_impl(format),
22✔
1021
              blt_format(dynamic_cast<const bro_log_format*>(format.get()))
22✔
1022
        {
1023
        }
22✔
1024

1025
        void get_columns(std::vector<vtab_column>& cols) const override
31✔
1026
        {
1027
            for (const auto& fd : this->blt_format->blf_field_defs) {
914✔
1028
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
883✔
1029
                    fd.fd_meta.lvm_kind);
883✔
1030

1031
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
883✔
1032
                                  type_pair.first,
1033
                                  fd.fd_collator,
883✔
1034
                                  false,
1,766✔
1035
                                  "",
1036
                                  type_pair.second);
1037
            }
1038
        }
31✔
1039

1040
        void get_foreign_keys(
11✔
1041
            std::unordered_set<std::string>& keys_inout) const override
1042
        {
1043
            this->log_vtab_impl::get_foreign_keys(keys_inout);
11✔
1044

1045
            for (const auto& fd : this->blt_format->blf_field_defs) {
322✔
1046
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
311✔
1047
                    keys_inout.emplace(fd.fd_meta.lvm_name.to_string());
136✔
1048
                }
1049
            }
1050
        }
11✔
1051

1052
        const bro_log_format* blt_format;
1053
    };
1054

1055
    static std::map<intern_string_t, std::shared_ptr<bro_log_table>>&
1056
    get_tables()
22✔
1057
    {
1058
        static std::map<intern_string_t, std::shared_ptr<bro_log_table>> retval;
22✔
1059

1060
        return retval;
22✔
1061
    }
1062

1063
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
659✔
1064
    {
1065
        if (this->blf_format_name.empty()) {
659✔
1066
            return nullptr;
637✔
1067
        }
1068

1069
        std::shared_ptr<bro_log_table> retval = nullptr;
22✔
1070

1071
        auto& tables = get_tables();
22✔
1072
        const auto iter = tables.find(this->blf_format_name);
22✔
1073
        if (iter == tables.end()) {
22✔
1074
            retval = std::make_shared<bro_log_table>(this->shared_from_this());
22✔
1075
            tables[this->blf_format_name] = retval;
22✔
1076
        }
1077

1078
        return retval;
22✔
1079
    }
22✔
1080

1081
    void get_subline(const log_format_file_state& lffs,
34,939✔
1082
                     const logline& ll,
1083
                     shared_buffer_ref& sbr,
1084
                     subline_options opts) override
1085
    {
1086
    }
34,939✔
1087

1088
    intern_string_t blf_format_name;
1089
    intern_string_t blf_separator;
1090
    intern_string_t blf_set_separator;
1091
    intern_string_t blf_empty_field;
1092
    intern_string_t blf_unset_field;
1093
    std::vector<field_def> blf_field_defs;
1094
};
1095

1096
std::unordered_map<const intern_string_t, logline_value_meta>
1097
    bro_log_format::FIELD_META;
1098

1099
const intern_string_t bro_log_format::TS = intern_string::lookup("bro_ts");
1100
const intern_string_t bro_log_format::DURATION
1101
    = intern_string::lookup("bro_duration");
1102

1103
struct ws_separated_string {
1104
    const char* ss_str;
1105
    size_t ss_len;
1106

1107
    explicit ws_separated_string(const char* str = nullptr, size_t len = -1)
21,590✔
1108
        : ss_str(str), ss_len(len)
21,590✔
1109
    {
1110
    }
21,590✔
1111

1112
    struct iterator {
1113
        enum class state_t {
1114
            NORMAL,
1115
            QUOTED,
1116
        };
1117

1118
        const ws_separated_string& i_parent;
1119
        const char* i_pos;
1120
        const char* i_next_pos;
1121
        size_t i_index{0};
1122
        state_t i_state{state_t::NORMAL};
1123

1124
        iterator(const ws_separated_string& ss, const char* pos)
70,401✔
1125
            : i_parent(ss), i_pos(pos), i_next_pos(pos)
70,401✔
1126
        {
1127
            this->update();
70,401✔
1128
        }
70,401✔
1129

1130
        void update()
116,439✔
1131
        {
1132
            const auto& ss = this->i_parent;
116,439✔
1133
            bool done = false;
116,439✔
1134

1135
            while (!done && this->i_next_pos < (ss.ss_str + ss.ss_len)) {
844,449✔
1136
                switch (this->i_state) {
728,010✔
1137
                    case state_t::NORMAL:
721,541✔
1138
                        if (*this->i_next_pos == '"') {
721,541✔
1139
                            this->i_state = state_t::QUOTED;
237✔
1140
                        } else if (isspace(*this->i_next_pos)) {
721,304✔
1141
                            done = true;
59,035✔
1142
                        }
1143
                        break;
721,541✔
1144
                    case state_t::QUOTED:
6,469✔
1145
                        if (*this->i_next_pos == '"') {
6,469✔
1146
                            this->i_state = state_t::NORMAL;
237✔
1147
                        }
1148
                        break;
6,469✔
1149
                }
1150
                if (!done) {
728,010✔
1151
                    this->i_next_pos += 1;
668,975✔
1152
                }
1153
            }
1154
        }
116,439✔
1155

1156
        iterator& operator++()
46,038✔
1157
        {
1158
            const auto& ss = this->i_parent;
46,038✔
1159

1160
            this->i_pos = this->i_next_pos;
46,038✔
1161
            while (this->i_pos < (ss.ss_str + ss.ss_len)
46,038✔
1162
                   && isspace(*this->i_pos))
89,470✔
1163
            {
1164
                this->i_pos += 1;
43,432✔
1165
                this->i_next_pos += 1;
43,432✔
1166
            }
1167
            this->update();
46,038✔
1168
            this->i_index += 1;
46,038✔
1169

1170
            return *this;
46,038✔
1171
        }
1172

1173
        string_fragment operator*()
62,282✔
1174
        {
1175
            const auto& ss = this->i_parent;
62,282✔
1176
            int end = this->i_next_pos - ss.ss_str;
62,282✔
1177

1178
            return string_fragment(ss.ss_str, this->i_pos - ss.ss_str, end);
62,282✔
1179
        }
1180

1181
        bool operator==(const iterator& other) const
48,811✔
1182
        {
1183
            return (&this->i_parent == &other.i_parent)
48,811✔
1184
                && (this->i_pos == other.i_pos);
48,811✔
1185
        }
1186

1187
        bool operator!=(const iterator& other) const
46,030✔
1188
        {
1189
            return !(*this == other);
46,030✔
1190
        }
1191

1192
        size_t index() const { return this->i_index; }
86,530✔
1193
    };
1194

1195
    iterator begin() { return {*this, this->ss_str}; }
21,590✔
1196

1197
    iterator end() { return {*this, this->ss_str + this->ss_len}; }
48,811✔
1198
};
1199

1200
class w3c_log_format : public log_format {
1201
public:
1202
    static const intern_string_t F_DATE;
1203
    static const intern_string_t F_TIME;
1204

1205
    struct field_def {
1206
        const intern_string_t fd_name;
1207
        logline_value_meta fd_meta;
1208
        logline_value_meta* fd_root_meta{nullptr};
1209
        std::string fd_collator;
1210
        std::optional<size_t> fd_numeric_index;
1211

1212
        explicit field_def(const intern_string_t name)
18✔
1213
            : fd_name(name), fd_meta(intern_string::lookup(sql_safe_ident(
36✔
1214
                                         name.to_string_fragment())),
36✔
1215
                                     value_kind_t::VALUE_TEXT)
18✔
1216
        {
1217
        }
18✔
1218

1219
        field_def(const intern_string_t name, logline_value_meta meta)
66✔
1220
            : fd_name(name), fd_meta(meta)
66✔
1221
        {
1222
        }
66✔
1223

1224
        field_def(size_t col,
9,856✔
1225
                  const char* name,
1226
                  value_kind_t kind,
1227
                  bool ident = false,
1228
                  bool foreign_key = false,
1229
                  std::string coll = "")
1230
            : fd_name(intern_string::lookup(name)),
19,712✔
1231
              fd_meta(
19,712✔
1232
                  intern_string::lookup(sql_safe_ident(string_fragment(name))),
19,712✔
1233
                  kind,
1234
                  logline_value_meta::table_column{col}),
9,856✔
1235
              fd_collator(std::move(coll))
9,856✔
1236
        {
1237
            this->fd_meta.lvm_identifier = ident;
9,856✔
1238
            this->fd_meta.lvm_foreign_key = foreign_key;
9,856✔
1239
        }
9,856✔
1240

1241
        field_def& with_kind(value_kind_t kind,
1242
                             bool identifier = false,
1243
                             const std::string& collator = "")
1244
        {
1245
            this->fd_meta.lvm_kind = kind;
1246
            this->fd_meta.lvm_identifier = identifier;
1247
            this->fd_collator = collator;
1248
            return *this;
1249
        }
1250

1251
        field_def& with_numeric_index(int index)
50✔
1252
        {
1253
            this->fd_numeric_index = index;
50✔
1254
            return *this;
50✔
1255
        }
1256
    };
1257

1258
    static std::unordered_map<const intern_string_t, logline_value_meta>
1259
        FIELD_META;
1260

1261
    struct field_to_struct_t {
1262
        field_to_struct_t(const char* prefix, const char* struct_name)
2,464✔
1263
            : fs_prefix(prefix),
2,464✔
1264
              fs_struct_name(intern_string::lookup(struct_name))
4,928✔
1265
        {
1266
        }
2,464✔
1267

1268
        const char* fs_prefix;
1269
        intern_string_t fs_struct_name;
1270
    };
1271

1272
    static const std::array<field_def, 16>& get_known_fields()
631✔
1273
    {
1274
        static size_t KNOWN_FIELD_INDEX = 0;
1275
        static const std::array<field_def, 16> RETVAL = {
1276
            field_def{
1277
                KNOWN_FIELD_INDEX++,
1278
                "cs-method",
1279
                value_kind_t::VALUE_TEXT,
1280
                true,
1281
            },
1282
            {
1283
                KNOWN_FIELD_INDEX++,
1284
                "c-ip",
1285
                value_kind_t::VALUE_TEXT,
1286
                true,
1287
                false,
1288
                "ipaddress",
1289
            },
1290
            {
1291
                KNOWN_FIELD_INDEX++,
1292
                "cs-bytes",
1293
                value_kind_t::VALUE_INTEGER,
1294
                false,
1295
            },
1296
            {
1297
                KNOWN_FIELD_INDEX++,
1298
                "cs-host",
1299
                value_kind_t::VALUE_TEXT,
1300
                true,
1301
            },
1302
            {
1303
                KNOWN_FIELD_INDEX++,
1304
                "cs-uri-stem",
1305
                value_kind_t::VALUE_TEXT,
1306
                true,
1307
                false,
1308
                "naturalnocase",
1309
            },
1310
            {
1311
                KNOWN_FIELD_INDEX++,
1312
                "cs-uri-query",
1313
                value_kind_t::VALUE_TEXT,
1314
                false,
1315
            },
1316
            {
1317
                KNOWN_FIELD_INDEX++,
1318
                "cs-username",
1319
                value_kind_t::VALUE_TEXT,
1320
                false,
1321
            },
1322
            {
1323
                KNOWN_FIELD_INDEX++,
1324
                "cs-version",
1325
                value_kind_t::VALUE_TEXT,
1326
                true,
1327
            },
1328
            {
1329
                KNOWN_FIELD_INDEX++,
1330
                "s-ip",
1331
                value_kind_t::VALUE_TEXT,
1332
                true,
1333
                false,
1334
                "ipaddress",
1335
            },
1336
            {
1337
                KNOWN_FIELD_INDEX++,
1338
                "s-port",
1339
                value_kind_t::VALUE_INTEGER,
1340
                true,
1341
            },
1342
            {
1343
                KNOWN_FIELD_INDEX++,
1344
                "s-computername",
1345
                value_kind_t::VALUE_TEXT,
1346
                true,
1347
            },
1348
            {
1349
                KNOWN_FIELD_INDEX++,
1350
                "s-sitename",
1351
                value_kind_t::VALUE_TEXT,
1352
                true,
1353
            },
1354
            {
1355
                KNOWN_FIELD_INDEX++,
1356
                "sc-bytes",
1357
                value_kind_t::VALUE_INTEGER,
1358
                false,
1359
            },
1360
            {
1361
                KNOWN_FIELD_INDEX++,
1362
                "sc-status",
1363
                value_kind_t::VALUE_INTEGER,
1364
                false,
1365
                true,
1366
            },
1367
            {
1368
                KNOWN_FIELD_INDEX++,
1369
                "sc-substatus",
1370
                value_kind_t::VALUE_INTEGER,
1371
                false,
1372
            },
1373
            {
1374
                KNOWN_FIELD_INDEX++,
1375
                "time-taken",
1376
                value_kind_t::VALUE_FLOAT,
1377
                false,
1378
            },
1379
        };
1,863✔
1380

1381
        return RETVAL;
631✔
1382
    }
1383

1384
    static const std::array<field_to_struct_t, 4>& get_known_struct_fields()
628✔
1385
    {
1386
        static const std::array<field_to_struct_t, 4> RETVAL = {
1387
            field_to_struct_t{"cs(", "cs_headers"},
1388
            {"sc(", "sc_headers"},
1389
            {"rs(", "rs_headers"},
1390
            {"sr(", "sr_headers"},
1391
        };
628✔
1392

1393
        return RETVAL;
628✔
1394
    }
1395

1396
    w3c_log_format()
757✔
1397
    {
757✔
1398
        this->lf_is_self_describing = true;
757✔
1399
        this->lf_time_ordered = false;
757✔
1400
        this->lf_structured = true;
757✔
1401
    }
757✔
1402

1403
    const intern_string_t get_name() const override
15,544✔
1404
    {
1405
        static const intern_string_t name(intern_string::lookup("w3c_log"));
17,046✔
1406

1407
        return this->wlf_format_name.empty() ? name : this->wlf_format_name;
15,544✔
1408
    }
1409

1410
    void clear() override
14,247✔
1411
    {
1412
        this->log_format::clear();
14,247✔
1413
        this->wlf_time_scanner.clear();
14,247✔
1414
        this->wlf_format_name.clear();
14,247✔
1415
        this->wlf_field_defs.clear();
14,247✔
1416
    }
14,247✔
1417

1418
    std::vector<logline_value_meta> get_value_metadata() const override
×
1419
    {
1420
        std::vector<logline_value_meta> retval;
×
1421

1422
        for (const auto& fd : this->wlf_field_defs) {
×
1423
            retval.emplace_back(fd.fd_meta);
×
1424
        }
1425
        return retval;
×
1426
    }
×
1427

1428
    scan_result_t scan_int(std::vector<logline>& dst,
1,309✔
1429
                           const line_info& li,
1430
                           shared_buffer_ref& sbr,
1431
                           scan_batch_context& sbc)
1432
    {
1433
        static const intern_string_t F_DATE_LOCAL
1434
            = intern_string::lookup("date-local");
1,337✔
1435
        static const intern_string_t F_DATE_UTC
1436
            = intern_string::lookup("date-UTC");
1,337✔
1437
        static const intern_string_t F_TIME_LOCAL
1438
            = intern_string::lookup("time-local");
1,337✔
1439
        static const intern_string_t F_TIME_UTC
1440
            = intern_string::lookup("time-UTC");
1,337✔
1441
        static const intern_string_t F_STATUS_CODE
1442
            = intern_string::lookup("sc-status");
1,337✔
1443

1444
        ws_separated_string ss(sbr.get_data(), sbr.length());
1,309✔
1445
        timeval date_tv{0, 0}, time_tv{0, 0};
1,309✔
1446
        exttm date_tm, time_tm;
1,309✔
1447
        size_t found_date = 0;
1,309✔
1448
        size_t found_time = 0;
1,309✔
1449
        log_level_t level = LEVEL_INFO;
1,309✔
1450

1451
        sbc.sbc_value_stats.resize(this->wlf_field_defs.size());
1,309✔
1452
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
19,676✔
1453
            if (iter.index() >= this->wlf_field_defs.size()) {
18,575✔
1454
                level = LEVEL_INVALID;
×
1455
                break;
×
1456
            }
1457

1458
            const auto& fd = this->wlf_field_defs[iter.index()];
18,575✔
1459
            string_fragment sf = *iter;
18,575✔
1460

1461
            if (sf.startswith("#")) {
18,575✔
1462
                if (sf == "#Date:") {
208✔
1463
                    auto sbr_sf_opt
1464
                        = sbr.to_string_fragment().consume_n(sf.length());
52✔
1465

1466
                    if (sbr_sf_opt) {
52✔
1467
                        auto sbr_sf = sbr_sf_opt.value().trim();
52✔
1468
                        date_time_scanner dts;
52✔
1469
                        exttm tm;
52✔
1470
                        timeval tv;
1471

1472
                        if (dts.scan(sbr_sf.data(),
52✔
1473
                                     sbr_sf.length(),
52✔
1474
                                     nullptr,
1475
                                     &tm,
1476
                                     tv))
1477
                        {
1478
                            this->lf_date_time.set_base_time(tv.tv_sec,
52✔
1479
                                                             tm.et_tm);
1480
                            this->wlf_time_scanner.set_base_time(tv.tv_sec,
52✔
1481
                                                                 tm.et_tm);
1482
                        }
1483
                    }
1484
                }
1485
                dst.emplace_back(li.li_file_range.fr_offset,
208✔
1486
                                 std::chrono::microseconds{0},
×
1487
                                 LEVEL_UNKNOWN);
208✔
1488
                dst.back().set_ignore(true);
208✔
1489
                return scan_match{2000};
208✔
1490
            }
1491

1492
            sf = sf.trim("\" \t");
18,367✔
1493
            if (F_DATE == fd.fd_name || F_DATE_LOCAL == fd.fd_name
35,674✔
1494
                || F_DATE_UTC == fd.fd_name)
35,674✔
1495
            {
1496
                if (this->lf_date_time.scan(
1,068✔
1497
                        sf.data(), sf.length(), nullptr, &date_tm, date_tv))
1,068✔
1498
                {
1499
                    this->lf_timestamp_flags |= date_tm.et_flags;
1,068✔
1500
                    found_date += 1;
1,068✔
1501
                }
1502
            } else if (F_TIME == fd.fd_name || F_TIME_LOCAL == fd.fd_name
33,508✔
1503
                       || F_TIME_UTC == fd.fd_name)
33,508✔
1504
            {
1505
                if (this->wlf_time_scanner.scan(
1,098✔
1506
                        sf.data(), sf.length(), nullptr, &time_tm, time_tv))
1,098✔
1507
                {
1508
                    this->lf_timestamp_flags |= time_tm.et_flags;
1,098✔
1509
                    found_time += 1;
1,098✔
1510
                }
1511
            } else if (F_STATUS_CODE == fd.fd_name) {
16,201✔
1512
                if (!sf.empty() && sf[0] >= '4') {
1,098✔
1513
                    level = LEVEL_ERROR;
1,018✔
1514
                }
1515
            }
1516

1517
            if (fd.fd_numeric_index) {
18,367✔
1518
                switch (fd.fd_meta.lvm_kind) {
6,401✔
1519
                    case value_kind_t::VALUE_INTEGER:
6,401✔
1520
                    case value_kind_t::VALUE_FLOAT: {
1521
                        auto scan_float_res
1522
                            = scn::scan_value<double>(sf.to_string_view());
6,401✔
1523

1524
                        if (scan_float_res) {
6,401✔
1525
                            sbc.sbc_value_stats[fd.fd_numeric_index.value()]
6,397✔
1526
                                .add_value(scan_float_res->value());
6,397✔
1527
                        }
1528
                        break;
6,401✔
1529
                    }
1530
                    default:
×
1531
                        break;
×
1532
                }
1533
            }
1534
        }
1535

1536
        if (found_time == 1 && found_date <= 1) {
1,101✔
1537
            auto tm = time_tm;
1,098✔
1538

1539
            if (found_date) {
1,098✔
1540
                tm.et_tm.tm_year = date_tm.et_tm.tm_year;
1,068✔
1541
                tm.et_tm.tm_mday = date_tm.et_tm.tm_mday;
1,068✔
1542
                tm.et_tm.tm_mon = date_tm.et_tm.tm_mon;
1,068✔
1543
                tm.et_tm.tm_wday = date_tm.et_tm.tm_wday;
1,068✔
1544
                tm.et_tm.tm_yday = date_tm.et_tm.tm_yday;
1,068✔
1545
            }
1546

1547
            auto tv = tm.to_timeval();
1,098✔
1548
            if (!this->lf_specialized) {
1,098✔
1549
                for (auto& ll : dst) {
66✔
1550
                    ll.set_ignore(true);
54✔
1551
                }
1552
            }
1553
            dst.emplace_back(li.li_file_range.fr_offset, to_us(tv), level);
1,098✔
1554
            return scan_match{2000};
1,098✔
1555
        }
1556

1557
        return scan_no_match{"no header found"};
3✔
1558
    }
1559

1560
    scan_result_t scan(logfile& lf,
12,391✔
1561
                       std::vector<logline>& dst,
1562
                       const line_info& li,
1563
                       shared_buffer_ref& sbr,
1564
                       scan_batch_context& sbc) override
1565
    {
1566
        static const auto* W3C_LOG_NAME = intern_string::lookup("w3c_log");
13,623✔
1567
        static const auto* X_FIELDS_NAME = intern_string::lookup("x_fields");
13,623✔
1568
        static const auto& KNOWN_FIELDS = get_known_fields();
12,391✔
1569
        static const auto& KNOWN_STRUCT_FIELDS = get_known_struct_fields();
12,391✔
1570
        static auto X_FIELDS_IDX = 0;
1571

1572
        if (li.li_partial) {
12,391✔
1573
            return scan_incomplete{};
19✔
1574
        }
1575

1576
        if (dst.empty()) {
12,372✔
1577
            auto file_options = lf.get_file_options();
1,147✔
1578

1579
            if (file_options) {
1,147✔
1580
                this->lf_date_time.dts_default_zone
1581
                    = file_options->second.fo_default_zone.pp_value;
57✔
1582
            } else {
1583
                this->lf_date_time.dts_default_zone = nullptr;
1,090✔
1584
            }
1585
        }
1,147✔
1586

1587
        if (!this->wlf_format_name.empty()) {
12,372✔
1588
            return this->scan_int(dst, li, sbr, sbc);
1,294✔
1589
        }
1590

1591
        if (dst.size() <= 2 || dst.size() > 20 || sbr.empty()
19,950✔
1592
            || sbr.get_data()[0] == '#')
19,950✔
1593
        {
1594
            return scan_no_match{"no header found"};
9,202✔
1595
        }
1596

1597
        this->clear();
1,876✔
1598

1599
        for (auto line_iter = dst.begin(); line_iter != dst.end(); ++line_iter)
20,693✔
1600
        {
1601
            auto next_read_result = lf.read_line(line_iter);
18,817✔
1602

1603
            if (next_read_result.isErr()) {
18,817✔
1604
                return scan_no_match{"unable to read first line"};
×
1605
            }
1606

1607
            auto line = next_read_result.unwrap();
18,817✔
1608
            ws_separated_string ss(line.get_data(), line.length());
18,817✔
1609
            auto iter = ss.begin();
18,817✔
1610
            const auto directive = *iter;
18,817✔
1611

1612
            if (directive.empty() || directive[0] != '#') {
18,817✔
1613
                continue;
16,036✔
1614
            }
1615

1616
            ++iter;
2,781✔
1617
            if (iter == ss.end()) {
2,781✔
1618
                continue;
38✔
1619
            }
1620

1621
            if (directive == "#Date:") {
2,743✔
1622
                date_time_scanner dts;
10✔
1623
                struct exttm tm;
10✔
1624
                struct timeval tv;
1625

1626
                if (dts.scan(line.get_data_at(directive.length() + 1),
10✔
1627
                             line.length() - directive.length() - 1,
10✔
1628
                             nullptr,
1629
                             &tm,
1630
                             tv))
1631
                {
1632
                    this->lf_date_time.set_base_time(tv.tv_sec, tm.et_tm);
10✔
1633
                    this->wlf_time_scanner.set_base_time(tv.tv_sec, tm.et_tm);
10✔
1634
                }
1635
            } else if (directive == "#Fields:" && this->wlf_field_defs.empty())
2,733✔
1636
            {
1637
                int numeric_count = 0;
15✔
1638

1639
                do {
1640
                    auto sf = (*iter).trim(")");
200✔
1641

1642
                    auto field_iter = std::find_if(
600✔
1643
                        begin(KNOWN_FIELDS),
1644
                        end(KNOWN_FIELDS),
1645
                        [&sf](auto elem) { return sf == elem.fd_name; });
2,253✔
1646
                    if (field_iter != end(KNOWN_FIELDS)) {
400✔
1647
                        this->wlf_field_defs.emplace_back(*field_iter);
116✔
1648
                        auto& fd = this->wlf_field_defs.back();
116✔
1649
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
116✔
1650
                        if (common_iter == FIELD_META.end()) {
116✔
1651
                            auto emp_res = FIELD_META.emplace(
116✔
1652
                                fd.fd_meta.lvm_name, fd.fd_meta);
116✔
1653
                            common_iter = emp_res.first;
116✔
1654
                        }
1655
                        fd.fd_root_meta = &common_iter->second;
116✔
1656
                    } else if (sf.is_one_of("date", "time")) {
84✔
1657
                        this->wlf_field_defs.emplace_back(
36✔
1658
                            intern_string::lookup(sf));
18✔
1659
                        auto& fd = this->wlf_field_defs.back();
18✔
1660
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
18✔
1661
                        if (common_iter == FIELD_META.end()) {
18✔
1662
                            auto emp_res = FIELD_META.emplace(
18✔
1663
                                fd.fd_meta.lvm_name, fd.fd_meta);
18✔
1664
                            common_iter = emp_res.first;
18✔
1665
                        }
1666
                        fd.fd_root_meta = &common_iter->second;
18✔
1667
                    } else {
1668
                        const auto fs_iter = std::find_if(
198✔
1669
                            begin(KNOWN_STRUCT_FIELDS),
1670
                            end(KNOWN_STRUCT_FIELDS),
1671
                            [&sf](auto elem) {
201✔
1672
                                return sf.startswith(elem.fs_prefix);
201✔
1673
                            });
1674
                        if (fs_iter != end(KNOWN_STRUCT_FIELDS)) {
132✔
1675
                            const intern_string_t field_name
1676
                                = intern_string::lookup(sf.substr(3));
21✔
1677
                            this->wlf_field_defs.emplace_back(
21✔
1678
                                field_name,
1679
                                logline_value_meta(
42✔
1680
                                    field_name,
1681
                                    value_kind_t::VALUE_TEXT,
1682
                                    logline_value_meta::table_column{
×
1683
                                        KNOWN_FIELDS.size() + 1
21✔
1684
                                        + std::distance(
63✔
1685
                                            begin(KNOWN_STRUCT_FIELDS),
1686
                                            fs_iter)},
1687
                                    this)
42✔
1688
                                    .with_struct_name(fs_iter->fs_struct_name));
1689
                        } else {
1690
                            const intern_string_t field_name
1691
                                = intern_string::lookup(sf);
45✔
1692
                            this->wlf_field_defs.emplace_back(
45✔
1693
                                field_name,
1694
                                logline_value_meta(
90✔
1695
                                    field_name,
1696
                                    value_kind_t::VALUE_TEXT,
1697
                                    logline_value_meta::table_column{
×
1698
                                        KNOWN_FIELDS.size() + X_FIELDS_IDX},
90✔
1699
                                    this)
90✔
1700
                                    .with_struct_name(X_FIELDS_NAME));
1701
                        }
1702
                    }
1703
                    auto& fd = this->wlf_field_defs.back();
200✔
1704
                    fd.fd_meta.lvm_format = std::make_optional(this);
200✔
1705
                    switch (fd.fd_meta.lvm_kind) {
200✔
1706
                        case value_kind_t::VALUE_FLOAT:
50✔
1707
                        case value_kind_t::VALUE_INTEGER:
1708
                            fd.with_numeric_index(numeric_count);
50✔
1709
                            numeric_count += 1;
50✔
1710
                            break;
50✔
1711
                        default:
150✔
1712
                            break;
150✔
1713
                    }
1714

1715
                    ++iter;
200✔
1716
                } while (iter != ss.end());
200✔
1717

1718
                this->wlf_format_name = W3C_LOG_NAME;
15✔
1719
            }
1720
        }
34,891✔
1721

1722
        if (!this->wlf_format_name.empty() && !this->wlf_field_defs.empty()) {
1,876✔
1723
            return this->scan_int(dst, li, sbr, sbc);
15✔
1724
        }
1725

1726
        this->wlf_format_name.clear();
1,861✔
1727

1728
        return scan_no_match{"no header found"};
1,861✔
1729
    }
1730

1731
    void annotate(logfile* lf,
1,464✔
1732
                  uint64_t line_number,
1733
                  string_attrs_t& sa,
1734
                  logline_value_vector& values) const override
1735
    {
1736
        auto& sbr = values.lvv_sbr;
1,464✔
1737
        ws_separated_string ss(sbr.get_data(), sbr.length());
1,464✔
1738
        std::optional<line_range> date_lr;
1,464✔
1739
        std::optional<line_range> time_lr;
1,464✔
1740

1741
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
26,154✔
1742
            auto sf = *iter;
24,690✔
1743

1744
            if (iter.index() >= this->wlf_field_defs.size()) {
24,690✔
1745
                sa.emplace_back(line_range{sf.sf_begin, -1},
×
1746
                                SA_INVALID.value("extra fields detected"));
×
1747
                return;
×
1748
            }
1749

1750
            const auto& fd = this->wlf_field_defs[iter.index()];
24,690✔
1751

1752
            if (sf == "-") {
24,690✔
1753
                sf.invalidate();
4,300✔
1754
            }
1755

1756
            auto lr = line_range(sf.sf_begin, sf.sf_end);
24,690✔
1757

1758
            if (lr.is_valid()) {
24,690✔
1759
                if (fd.fd_meta.lvm_name == F_DATE) {
20,390✔
1760
                    date_lr = lr;
1,442✔
1761
                } else if (fd.fd_meta.lvm_name == F_TIME) {
18,948✔
1762
                    time_lr = lr;
1,456✔
1763
                }
1764
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
20,390✔
1765
                if (sf.startswith("\"")) {
20,390✔
1766
                    auto& meta = values.lvv_values.back().lv_meta;
28✔
1767

1768
                    if (meta.lvm_kind == value_kind_t::VALUE_TEXT) {
28✔
1769
                        meta.lvm_kind = value_kind_t::VALUE_W3C_QUOTED;
26✔
1770
                    } else {
1771
                        meta.lvm_kind = value_kind_t::VALUE_NULL;
2✔
1772
                    }
1773
                }
1774
            } else {
1775
                values.lvv_values.emplace_back(fd.fd_meta);
4,300✔
1776
            }
1777
            if (fd.fd_root_meta != nullptr) {
24,690✔
1778
                values.lvv_values.back().lv_meta.lvm_user_hidden
20,314✔
1779
                    = fd.fd_root_meta->lvm_user_hidden;
20,314✔
1780
            }
1781
        }
1782
        if (time_lr) {
1,464✔
1783
            auto ts_lr = time_lr.value();
1,456✔
1784
            if (date_lr) {
1,456✔
1785
                if (date_lr->lr_end + 1 == time_lr->lr_start) {
1,442✔
1786
                    ts_lr.lr_start = date_lr->lr_start;
1,442✔
1787
                    ts_lr.lr_end = time_lr->lr_end;
1,442✔
1788
                }
1789
            }
1790

1791
            sa.emplace_back(ts_lr, L_TIMESTAMP.value());
1,456✔
1792
        }
1793
        log_format::annotate(lf, line_number, sa, values);
1,464✔
1794
    }
1795

1796
    std::optional<size_t> stats_index_for_value(
×
1797
        const intern_string_t& name) const override
1798
    {
1799
        for (const auto& wlf_field_def : this->wlf_field_defs) {
×
1800
            if (wlf_field_def.fd_meta.lvm_name == name) {
×
1801
                if (!wlf_field_def.fd_numeric_index) {
×
1802
                    break;
×
1803
                }
1804
                return wlf_field_def.fd_numeric_index.value();
×
1805
            }
1806
        }
1807

1808
        return std::nullopt;
×
1809
    }
1810

1811
    bool hide_field(const intern_string_t field_name, bool val) override
×
1812
    {
1813
        if (field_name == LOG_TIME_STR) {
×
1814
            auto date_iter = FIELD_META.find(F_DATE);
×
1815
            auto time_iter = FIELD_META.find(F_TIME);
×
1816
            if (date_iter == FIELD_META.end() || time_iter == FIELD_META.end())
×
1817
            {
1818
                return false;
×
1819
            }
1820
            date_iter->second.lvm_user_hidden = val;
×
1821
            time_iter->second.lvm_user_hidden = val;
×
1822
            return true;
×
1823
        }
1824

1825
        auto fd_iter = FIELD_META.find(field_name);
×
1826
        if (fd_iter == FIELD_META.end()) {
×
1827
            return false;
×
1828
        }
1829

1830
        fd_iter->second.lvm_user_hidden = val;
×
1831

1832
        return true;
×
1833
    }
1834

1835
    std::map<intern_string_t, logline_value_meta> get_field_states() override
53✔
1836
    {
1837
        std::map<intern_string_t, logline_value_meta> retval;
53✔
1838

1839
        for (const auto& fd : FIELD_META) {
109✔
1840
            retval.emplace(fd.first, fd.second);
56✔
1841
        }
1842

1843
        return retval;
53✔
1844
    }
×
1845

1846
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
12✔
1847
    {
1848
        auto retval = std::make_shared<w3c_log_format>(*this);
12✔
1849

1850
        retval->lf_specialized = true;
12✔
1851
        return retval;
24✔
1852
    }
12✔
1853

1854
    class w3c_log_table : public log_format_vtab_impl {
1855
    public:
1856
        explicit w3c_log_table(std::shared_ptr<const log_format> format)
9✔
1857
            : log_format_vtab_impl(format)
9✔
1858
        {
1859
        }
9✔
1860

1861
        void get_columns(std::vector<vtab_column>& cols) const override
12✔
1862
        {
1863
            for (const auto& fd : get_known_fields()) {
204✔
1864
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
192✔
1865
                    fd.fd_meta.lvm_kind);
192✔
1866

1867
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
192✔
1868
                                  type_pair.first,
1869
                                  fd.fd_collator,
192✔
1870
                                  false,
384✔
1871
                                  "",
1872
                                  type_pair.second);
1873
            }
1874
            cols.emplace_back("x_fields");
12✔
1875
            cols.back().with_comment(
24✔
1876
                "A JSON-object that contains fields that are not first-class "
1877
                "columns");
1878
            for (const auto& fs : get_known_struct_fields()) {
60✔
1879
                cols.emplace_back(fs.fs_struct_name.to_string());
48✔
1880
            }
1881
        }
12✔
1882

1883
        void get_foreign_keys(
3✔
1884
            std::unordered_set<std::string>& keys_inout) const override
1885
        {
1886
            this->log_vtab_impl::get_foreign_keys(keys_inout);
3✔
1887

1888
            for (const auto& fd : get_known_fields()) {
51✔
1889
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
48✔
1890
                    keys_inout.emplace(fd.fd_meta.lvm_name.to_string());
30✔
1891
                }
1892
            }
1893
        }
3✔
1894
    };
1895

1896
    static std::map<intern_string_t, std::shared_ptr<w3c_log_table>>&
1897
    get_tables()
9✔
1898
    {
1899
        static std::map<intern_string_t, std::shared_ptr<w3c_log_table>> retval;
9✔
1900

1901
        return retval;
9✔
1902
    }
1903

1904
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
646✔
1905
    {
1906
        if (this->wlf_format_name.empty()) {
646✔
1907
            return nullptr;
637✔
1908
        }
1909

1910
        std::shared_ptr<w3c_log_table> retval = nullptr;
9✔
1911

1912
        auto& tables = get_tables();
9✔
1913
        const auto iter = tables.find(this->wlf_format_name);
9✔
1914
        if (iter == tables.end()) {
9✔
1915
            retval = std::make_shared<w3c_log_table>(this->shared_from_this());
9✔
1916
            tables[this->wlf_format_name] = retval;
9✔
1917
        }
1918

1919
        return retval;
9✔
1920
    }
9✔
1921

1922
    void get_subline(const log_format_file_state& lffs,
1,622✔
1923
                     const logline& ll,
1924
                     shared_buffer_ref& sbr,
1925
                     subline_options opts) override
1926
    {
1927
    }
1,622✔
1928

1929
    date_time_scanner wlf_time_scanner;
1930
    intern_string_t wlf_format_name;
1931
    std::vector<field_def> wlf_field_defs;
1932
};
1933

1934
std::unordered_map<const intern_string_t, logline_value_meta>
1935
    w3c_log_format::FIELD_META;
1936

1937
const intern_string_t w3c_log_format::F_DATE = intern_string::lookup("date");
1938
const intern_string_t w3c_log_format::F_TIME = intern_string::lookup("time");
1939

1940
struct logfmt_pair_handler {
1941
    explicit logfmt_pair_handler(date_time_scanner& dts) : lph_dt_scanner(dts)
12,371✔
1942
    {
1943
    }
12,371✔
1944

1945
    log_format::scan_result_t process_value(const string_fragment& value_frag)
3,856✔
1946
    {
1947
        if (this->lph_key_frag.is_one_of(
3,856✔
1948
                "timestamp"_frag, "time"_frag, "ts"_frag, "t"_frag))
1949
        {
1950
            if (!this->lph_dt_scanner.scan(value_frag.data(),
43✔
1951
                                           value_frag.length(),
43✔
1952
                                           nullptr,
1953
                                           &this->lph_time_tm,
1954
                                           this->lph_tv))
43✔
1955
            {
1956
                return log_format::scan_no_match{
12✔
1957
                    "timestamp value did not parse correctly"};
12✔
1958
            }
1959
            char buf[1024];
1960
            this->lph_dt_scanner.ftime(
31✔
1961
                buf, sizeof(buf), nullptr, this->lph_time_tm);
31✔
1962
            this->lph_found_time += 1;
31✔
1963
        } else if (this->lph_key_frag.is_one_of("level"_frag, "lvl"_frag)) {
3,813✔
1964
            this->lph_level
1965
                = string2level(value_frag.data(), value_frag.length());
40✔
1966
        }
1967
        return log_format::scan_match{};
3,844✔
1968
    }
1969

1970
    date_time_scanner& lph_dt_scanner;
1971
    size_t lph_found_time{0};
1972
    exttm lph_time_tm;
1973
    timeval lph_tv{0, 0};
1974
    log_level_t lph_level{log_level_t::LEVEL_INFO};
1975
    string_fragment lph_key_frag{""};
1976
};
1977

1978
class logfmt_format : public log_format {
1979
public:
1980
    const intern_string_t get_name() const override
15,098✔
1981
    {
1982
        const static intern_string_t NAME = intern_string::lookup("logfmt_log");
16,600✔
1983

1984
        return NAME;
15,098✔
1985
    }
1986

1987
    class logfmt_log_table : public log_format_vtab_impl {
1988
    public:
1989
        logfmt_log_table(std::shared_ptr<const log_format> format)
637✔
1990
            : log_format_vtab_impl(format)
637✔
1991
        {
1992
        }
637✔
1993

1994
        void get_columns(std::vector<vtab_column>& cols) const override
638✔
1995
        {
1996
            static const auto FIELDS = std::string("fields");
1,912✔
1997

1998
            cols.emplace_back(FIELDS);
638✔
1999
        }
638✔
2000
    };
2001

2002
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
637✔
2003
    {
2004
        static auto retval
2005
            = std::make_shared<logfmt_log_table>(this->shared_from_this());
637✔
2006

2007
        return retval;
637✔
2008
    }
2009

2010
    scan_result_t scan(logfile& lf,
12,371✔
2011
                       std::vector<logline>& dst,
2012
                       const line_info& li,
2013
                       shared_buffer_ref& sbr,
2014
                       scan_batch_context& sbc) override
2015
    {
2016
        auto p = logfmt::parser(sbr.to_string_fragment());
12,371✔
2017
        scan_result_t retval = scan_no_match{};
12,371✔
2018
        bool done = false;
12,371✔
2019
        logfmt_pair_handler lph(this->lf_date_time);
12,371✔
2020

2021
        if (dst.empty()) {
12,371✔
2022
            auto file_options = lf.get_file_options();
1,160✔
2023

2024
            if (file_options) {
1,160✔
2025
                this->lf_date_time.dts_default_zone
2026
                    = file_options->second.fo_default_zone.pp_value;
57✔
2027
            } else {
2028
                this->lf_date_time.dts_default_zone = nullptr;
1,103✔
2029
            }
2030
        }
1,160✔
2031

2032
        while (!done) {
44,033✔
2033
            auto parse_result = p.step();
31,662✔
2034

2035
            auto value_res = parse_result.match(
2036
                [&done](const logfmt::parser::end_of_input&) -> scan_result_t {
×
2037
                    done = true;
12,072✔
2038
                    return scan_match{};
12,072✔
2039
                },
NEW
2040
                [](const string_fragment&) -> scan_result_t {
×
2041
                    return scan_incomplete{};
15,447✔
2042
                },
UNCOV
2043
                [&lph](const logfmt::parser::kvpair& kvp) -> scan_result_t {
×
2044
                    lph.lph_key_frag = kvp.first;
3,856✔
2045

2046
                    return kvp.second.match(
2047
                        [](const logfmt::parser::bool_value& bv)
×
2048
                            -> scan_result_t { return scan_match{}; },
×
2049
                        [&lph](const logfmt::parser::float_value& fv)
×
2050
                            -> scan_result_t {
2051
                            return lph.process_value(fv.fv_str_value);
5✔
2052
                        },
2053
                        [&lph](const logfmt::parser::int_value& iv)
×
2054
                            -> scan_result_t {
2055
                            return lph.process_value(iv.iv_str_value);
112✔
2056
                        },
2057
                        [&lph](const logfmt::parser::quoted_value& qv)
×
2058
                            -> scan_result_t {
2059
                            auto_mem<yajl_handle_t> handle(yajl_free);
334✔
2060
                            yajl_callbacks cb;
2061
                            scan_result_t retval;
334✔
2062

2063
                            memset(&cb, 0, sizeof(cb));
334✔
2064
                            handle = yajl_alloc(&cb, nullptr, &lph);
334✔
2065
                            cb.yajl_string = +[](void* ctx,
668✔
2066
                                                 const unsigned char* str,
2067
                                                 size_t len,
2068
                                                 yajl_string_props_t*) -> int {
2069
                                auto& lph = *((logfmt_pair_handler*) ctx);
334✔
2070
                                string_fragment value_frag{str, 0, (int) len};
334✔
2071

2072
                                auto value_res = lph.process_value(value_frag);
334✔
2073
                                return value_res.is<scan_match>();
668✔
2074
                            };
668✔
2075

2076
                            if (yajl_parse(
334✔
2077
                                    handle,
2078
                                    (const unsigned char*) qv.qv_value.data(),
334✔
2079
                                    qv.qv_value.length())
334✔
2080
                                    != yajl_status_ok
2081
                                || yajl_complete_parse(handle)
334✔
2082
                                    != yajl_status_ok)
2083
                            {
2084
                                log_debug("json parsing failed");
×
2085
                                string_fragment unq_frag{
2086
                                    qv.qv_value.sf_string,
×
2087
                                    qv.qv_value.sf_begin + 1,
×
2088
                                    qv.qv_value.sf_end - 1,
×
2089
                                };
2090

2091
                                return lph.process_value(unq_frag);
×
2092
                            }
2093

2094
                            return scan_match{};
334✔
2095
                        },
334✔
2096
                        [&lph](const logfmt::parser::unquoted_value& uv)
3,856✔
2097
                            -> scan_result_t {
2098
                            return lph.process_value(uv.uv_value);
3,405✔
2099
                        });
7,712✔
2100
                },
2101
                [](const logfmt::parser::error& err) -> scan_result_t {
×
2102
                    // log_error("logfmt parse error: %s", err.e_msg.c_str());
2103
                    return scan_no_match{};
287✔
2104
                });
31,662✔
2105
            if (value_res.is<scan_no_match>()) {
31,662✔
2106
                retval = value_res;
299✔
2107
                done = true;
299✔
2108
            }
2109
        }
31,662✔
2110

2111
        if (lph.lph_found_time == 1) {
12,371✔
2112
            this->lf_timestamp_flags = lph.lph_time_tm.et_flags;
31✔
2113
            dst.emplace_back(
31✔
2114
                li.li_file_range.fr_offset, to_us(lph.lph_tv), lph.lph_level);
31✔
2115
            retval = scan_match{500};
31✔
2116
        }
2117

2118
        return retval;
24,742✔
2119
    }
×
2120

2121
    void annotate(logfile* lf,
11✔
2122
                  uint64_t line_number,
2123
                  string_attrs_t& sa,
2124
                  logline_value_vector& values) const override
2125
    {
2126
        static const intern_string_t FIELDS_NAME
2127
            = intern_string::lookup("fields");
15✔
2128

2129
        auto& sbr = values.lvv_sbr;
11✔
2130
        auto p = logfmt::parser(sbr.to_string_fragment());
11✔
2131
        auto done = false;
11✔
2132
        size_t found_body = 0;
11✔
2133

2134
        while (!done) {
95✔
2135
            auto parse_result = p.step();
84✔
2136

2137
            done = parse_result.match(
168✔
2138
                [](const logfmt::parser::end_of_input&) { return true; },
11✔
NEW
2139
                [](const string_fragment&) { return false; },
×
UNCOV
2140
                [this, &sa, &values, &found_body](
×
2141
                    const logfmt::parser::kvpair& kvp) {
2142
                    auto value_frag = kvp.second.match(
73✔
2143
                        [this, &kvp, &values](
×
2144
                            const logfmt::parser::bool_value& bv) {
2145
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2146
                                                              kvp.first),
×
2147
                                                          value_kind_t::
2148
                                                              VALUE_INTEGER,
2149
                                                          logline_value_meta::
2150
                                                              table_column{0},
×
2151
                                                          (log_format*) this}
×
2152
                                           .with_struct_name(FIELDS_NAME);
×
2153
                            values.lvv_values.emplace_back(lvm, bv.bv_value);
×
2154

2155
                            return bv.bv_str_value;
×
2156
                        },
×
2157
                        [this, &kvp, &values](
×
2158
                            const logfmt::parser::int_value& iv) {
2159
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2160
                                                              kvp.first),
×
2161
                                                          value_kind_t::
2162
                                                              VALUE_INTEGER,
2163
                                                          logline_value_meta::
2164
                                                              table_column{0},
×
2165
                                                          (log_format*) this}
×
2166
                                           .with_struct_name(FIELDS_NAME);
×
2167
                            values.lvv_values.emplace_back(lvm, iv.iv_value);
×
2168

2169
                            return iv.iv_str_value;
×
2170
                        },
×
2171
                        [this, &kvp, &values](
73✔
2172
                            const logfmt::parser::float_value& fv) {
2173
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2174
                                                              kvp.first),
×
2175
                                                          value_kind_t::
2176
                                                              VALUE_INTEGER,
2177
                                                          logline_value_meta::
2178
                                                              table_column{0},
×
2179
                                                          (log_format*) this}
×
2180
                                           .with_struct_name(FIELDS_NAME);
×
2181
                            values.lvv_values.emplace_back(lvm, fv.fv_value);
×
2182

2183
                            return fv.fv_str_value;
×
2184
                        },
×
2185
                        [](const logfmt::parser::quoted_value& qv) {
×
2186
                            return qv.qv_value;
24✔
2187
                        },
2188
                        [](const logfmt::parser::unquoted_value& uv) {
×
2189
                            return uv.uv_value;
49✔
2190
                        });
2191
                    auto value_lr
2192
                        = line_range{value_frag.sf_begin, value_frag.sf_end};
73✔
2193

2194
                    auto known_field = false;
73✔
2195
                    if (kvp.first.is_one_of(
73✔
2196
                            "timestamp"_frag, "time"_frag, "ts"_frag, "t"_frag))
2197
                    {
2198
                        sa.emplace_back(value_lr, L_TIMESTAMP.value());
11✔
2199
                        known_field = true;
11✔
2200
                    } else if (kvp.first.is_one_of("level"_frag, "lvl"_frag)) {
62✔
2201
                        sa.emplace_back(value_lr, L_LEVEL.value());
11✔
2202
                        known_field = true;
11✔
2203
                    } else if (kvp.first.is_one_of("msg"_frag,
51✔
2204
                                                   "message"_frag)) {
2205
                        sa.emplace_back(value_lr, SA_BODY.value());
11✔
2206
                        found_body += 1;
11✔
2207
                    } else if (kvp.second.is<logfmt::parser::quoted_value>()
40✔
2208
                               || kvp.second
78✔
2209
                                      .is<logfmt::parser::unquoted_value>())
38✔
2210
                    {
2211
                        auto lvm
2212
                            = logline_value_meta{intern_string::lookup(
160✔
2213
                                                     kvp.first),
40✔
2214
                                                 value_frag.startswith("\"")
40✔
2215
                                                     ? value_kind_t::VALUE_JSON
2216
                                                     : value_kind_t::VALUE_TEXT,
2217
                                                 logline_value_meta::
2218
                                                     table_column{0},
40✔
2219
                                                 (log_format*) this}
80✔
2220
                                  .with_struct_name(FIELDS_NAME);
40✔
2221
                        values.lvv_values.emplace_back(lvm, value_frag);
40✔
2222
                    }
40✔
2223
                    if (known_field) {
73✔
2224
                        auto key_with_eq = kvp.first;
22✔
2225
                        key_with_eq.sf_end += 1;
22✔
2226
                        sa.emplace_back(to_line_range(key_with_eq),
22✔
2227
                                        SA_REPLACED.value());
44✔
2228
                    } else {
2229
                        sa.emplace_back(to_line_range(kvp.first),
51✔
2230
                                        VC_ROLE.value(role_t::VCR_OBJECT_KEY));
102✔
2231
                    }
2232
                    return false;
73✔
2233
                },
2234
                [line_number, &sbr](const logfmt::parser::error& err) {
84✔
2235
                    log_error(
×
2236
                        "bad line %.*s", (int) sbr.length(), sbr.get_data());
2237
                    log_error("%lld:logfmt parse error: %s",
×
2238
                              line_number,
2239
                              err.e_msg.c_str());
2240
                    return true;
×
2241
                });
2242
        }
84✔
2243

2244
        if (found_body == 1) {
11✔
2245
            sa.emplace_back(line_range::empty_at(sbr.length()),
11✔
2246
                            SA_BODY.value());
22✔
2247
        }
2248

2249
        log_format::annotate(lf, line_number, sa, values);
11✔
2250
    }
11✔
2251

2252
    std::shared_ptr<log_format> specialized(int fmt_lock) override
5✔
2253
    {
2254
        auto retval = std::make_shared<logfmt_format>(*this);
5✔
2255

2256
        retval->lf_specialized = true;
5✔
2257
        return retval;
10✔
2258
    }
5✔
2259
};
2260

2261
static auto format_binder = injector::bind_multiple<log_format>()
2262
                                .add<logfmt_format>()
2263
                                .add<bro_log_format>()
2264
                                .add<w3c_log_format>()
2265
                                .add<generic_log_format>()
2266
                                .add<piper_log_format>();
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc