• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tstack / lnav / 11872214087-1756

16 Nov 2024 06:12PM UTC coverage: 70.243% (+0.5%) from 69.712%
11872214087-1756

push

github

tstack
[build] disable regex101

46266 of 65866 relevant lines covered (70.24%)

467515.63 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.09
/src/log_format_impls.cc
1
/**
2
 * Copyright (c) 2007-2017, Timothy Stack
3
 *
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * * Redistributions of source code must retain the above copyright notice, this
10
 * list of conditions and the following disclaimer.
11
 * * Redistributions in binary form must reproduce the above copyright notice,
12
 * this list of conditions and the following disclaimer in the documentation
13
 * and/or other materials provided with the distribution.
14
 * * Neither the name of Timothy Stack nor the names of its contributors
15
 * may be used to endorse or promote products derived from this software
16
 * without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 *
29
 * @file log_format_impls.cc
30
 */
31

32
#include <algorithm>
33
#include <utility>
34

35
#include "log_format.hh"
36

37
#include <stdio.h>
38

39
#include "base/injector.bind.hh"
40
#include "base/opt_util.hh"
41
#include "config.h"
42
#include "formats/logfmt/logfmt.parser.hh"
43
#include "log_vtab_impl.hh"
44
#include "ptimec.hh"
45
#include "scn/scn.h"
46
#include "sql_util.hh"
47
#include "yajlpp/yajlpp.hh"
48

49
class piper_log_format : public log_format {
50
public:
51
    const intern_string_t get_name() const override
12,525✔
52
    {
53
        static const intern_string_t RETVAL
54
            = intern_string::lookup("lnav_piper_log");
12,525✔
55

56
        return RETVAL;
12,525✔
57
    }
58

59
    scan_result_t scan(logfile& lf,
10,177✔
60
                       std::vector<logline>& dst,
61
                       const line_info& li,
62
                       shared_buffer_ref& sbr,
63
                       scan_batch_context& sbc) override
64
    {
65
        if (lf.has_line_metadata()
10,177✔
66
            && lf.get_text_format() == text_format_t::TF_LOG)
10,177✔
67
        {
68
            dst.emplace_back(
125✔
69
                li.li_file_range.fr_offset, li.li_timestamp, li.li_level);
125✔
70
            return scan_match{100};
125✔
71
        }
72

73
        return scan_no_match{""};
10,052✔
74
    }
75

76
    void annotate(logfile* lf,
41✔
77
                  uint64_t line_number,
78
                  string_attrs_t& sa,
79
                  logline_value_vector& values,
80
                  bool annotate_module) const override
81
    {
82
        auto lr = line_range{0, 0};
41✔
83
        sa.emplace_back(lr, logline::L_TIMESTAMP.value());
41✔
84
        log_format::annotate(lf, line_number, sa, values, annotate_module);
41✔
85
    }
41✔
86

87
    void get_subline(const logline& ll,
317✔
88
                     shared_buffer_ref& sbr,
89
                     bool full_message) override
90
    {
91
        this->plf_cached_line.resize(32);
317✔
92
        auto tlen = sql_strftime(this->plf_cached_line.data(),
317✔
93
                                 this->plf_cached_line.size(),
94
                                 ll.get_timeval(),
317✔
95
                                 'T');
96
        this->plf_cached_line.resize(tlen);
317✔
97
        {
98
            char zone_str[16];
99
            exttm tmptm;
317✔
100

101
            tmptm.et_flags |= ETF_ZONE_SET;
317✔
102
            tmptm.et_gmtoff
103
                = lnav::local_time_to_info(
634✔
104
                      date::local_seconds{std::chrono::seconds{ll.get_time()}})
317✔
105
                      .first.offset.count();
317✔
106
            off_t zone_len = 0;
317✔
107
            ftime_z(zone_str, zone_len, sizeof(zone_str), tmptm);
317✔
108
            for (off_t lpc = 0; lpc < zone_len; lpc++) {
1,902✔
109
                this->plf_cached_line.push_back(zone_str[lpc]);
1,585✔
110
            }
111
        }
112
        this->plf_cached_line.push_back(' ');
317✔
113
        const auto prefix_len = this->plf_cached_line.size();
317✔
114
        this->plf_cached_line.resize(this->plf_cached_line.size()
634✔
115
                                     + sbr.length());
317✔
116
        memcpy(
317✔
117
            &this->plf_cached_line[prefix_len], sbr.get_data(), sbr.length());
317✔
118

119
        sbr.share(this->plf_share_manager,
634✔
120
                  this->plf_cached_line.data(),
317✔
121
                  this->plf_cached_line.size());
122
    }
317✔
123

124
    std::shared_ptr<log_format> specialized(int fmt_lock) override
6✔
125
    {
126
        auto retval = std::make_shared<piper_log_format>(*this);
6✔
127

128
        retval->lf_specialized = true;
6✔
129
        retval->lf_timestamp_flags |= ETF_ZONE_SET;
6✔
130
        return retval;
12✔
131
    }
6✔
132

133
private:
134
    shared_buffer plf_share_manager;
135
    std::vector<char> plf_cached_line;
136
};
137

138
class generic_log_format : public log_format {
139
public:
140
    static const pcre_format* get_pcre_log_formats()
10,218✔
141
    {
142
        static const pcre_format log_fmt[] = {
143
            pcre_format(
144
                "^(?:\\*\\*\\*\\s+)?(?<timestamp>@[0-9a-zA-Z]{16,24})(.*)"),
145
            pcre_format(
146
                R"(^(?:\*\*\*\s+)?(?<timestamp>(?:\s|\d{4}[\-\/]\d{2}[\-\/]\d{2}|T|\d{1,2}:\d{2}(?::\d{2}(?:[\.,]\d{1,6})?)?|Z|[+\-]\d{2}:?\d{2}|(?!DBG|ERR|INFO|WARN|NONE)[A-Z]{3,4})+)(?:\s+|[:|])([^:]+))"),
147
            pcre_format(
148
                "^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w:+/\\.-]+) \\[\\w (.*)"),
149
            pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w:,/\\.-]+) (.*)"),
150
            pcre_format(
151
                "^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w:,/\\.-]+) - (.*)"),
152
            pcre_format(
153
                "^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w: \\.,/-]+) - (.*)"),
154
            pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w: "
155
                        "\\.,/-]+)\\[[^\\]]+\\](.*)"),
156
            pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w: \\.,/-]+) (.*)"),
157

158
            pcre_format(
159
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w: \.,+/-]+)\]\s*(\w+):?)"),
160
            pcre_format(
161
                "^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: \\.,+/-]+)\\] (.*)"),
162
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: "
163
                        "\\.,+/-]+)\\] \\[(\\w+)\\]"),
164
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: "
165
                        "\\.,+/-]+)\\] \\w+ (.*)"),
166
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: ,+/-]+)\\] "
167
                        "\\(\\d+\\) (.*)"),
168

169
            pcre_format(),
170
        };
10,218✔
171

172
        return log_fmt;
10,218✔
173
    }
174

175
    std::string get_pattern_regex(uint64_t line_number) const override
×
176
    {
177
        int pat_index = this->pattern_index_for_line(line_number);
×
178
        return get_pcre_log_formats()[pat_index].name;
×
179
    }
180

181
    const intern_string_t get_name() const override
13,050✔
182
    {
183
        static const intern_string_t RETVAL
184
            = intern_string::lookup("generic_log");
13,050✔
185

186
        return RETVAL;
13,050✔
187
    }
188

189
    scan_result_t scan(logfile& lf,
10,140✔
190
                       std::vector<logline>& dst,
191
                       const line_info& li,
192
                       shared_buffer_ref& sbr,
193
                       scan_batch_context& sbc) override
194
    {
195
        struct exttm log_time;
10,140✔
196
        struct timeval log_tv;
197
        string_fragment ts;
10,140✔
198
        std::optional<string_fragment> level;
10,140✔
199
        const char* last_pos;
200

201
        if (dst.empty()) {
10,140✔
202
            auto file_options = lf.get_file_options();
183✔
203

204
            if (file_options) {
183✔
205
                this->lf_date_time.dts_default_zone
206
                    = file_options->second.fo_default_zone.pp_value;
2✔
207
            } else {
208
                this->lf_date_time.dts_default_zone = nullptr;
181✔
209
            }
210
        }
183✔
211

212
        if ((last_pos = this->log_scanf(dst.size(),
10,140✔
213
                                        sbr.to_string_fragment(),
214
                                        get_pcre_log_formats(),
215
                                        nullptr,
216
                                        &log_time,
217
                                        &log_tv,
218

219
                                        &ts,
220
                                        &level))
221
            != nullptr)
10,140✔
222
        {
223
            log_level_t level_val = log_level_t::LEVEL_UNKNOWN;
1,064✔
224
            if (level) {
1,064✔
225
                level_val = string2level(level->data(), level->length());
1,064✔
226
            }
227

228
            if (!((log_time.et_flags & ETF_DAY_SET)
1,064✔
229
                  && (log_time.et_flags & ETF_MONTH_SET)
990✔
230
                  && (log_time.et_flags & ETF_YEAR_SET)))
990✔
231
            {
232
                this->check_for_new_year(dst, log_time, log_tv);
638✔
233
            }
234

235
            if (!(this->lf_timestamp_flags
2,128✔
236
                  & (ETF_MILLIS_SET | ETF_MICROS_SET | ETF_NANOS_SET))
1,064✔
237
                && !dst.empty() && dst.back().get_time() == log_tv.tv_sec
829✔
238
                && dst.back().get_millis() != 0)
1,893✔
239
            {
240
                auto log_ms
241
                    = std::chrono::milliseconds(dst.back().get_millis());
×
242

243
                log_time.et_nsec
244
                    = std::chrono::duration_cast<std::chrono::nanoseconds>(
×
245
                          log_ms)
246
                          .count();
×
247
                log_tv.tv_usec
248
                    = std::chrono::duration_cast<std::chrono::microseconds>(
×
249
                          log_ms)
250
                          .count();
×
251
            }
252

253
            dst.emplace_back(li.li_file_range.fr_offset, log_tv, level_val);
1,064✔
254
            return scan_match{0};
1,064✔
255
        }
256

257
        return scan_no_match{"no patterns matched"};
9,076✔
258
    }
259

260
    void annotate(logfile* lf,
78✔
261
                  uint64_t line_number,
262
                  string_attrs_t& sa,
263
                  logline_value_vector& values,
264
                  bool annotate_module) const override
265
    {
266
        auto& line = values.lvv_sbr;
78✔
267
        int pat_index = this->pattern_index_for_line(line_number);
78✔
268
        const auto& fmt = get_pcre_log_formats()[pat_index];
78✔
269
        int prefix_len = 0;
78✔
270
        auto md = fmt.pcre->create_match_data();
78✔
271
        auto match_res = fmt.pcre->capture_from(line.to_string_fragment())
78✔
272
                             .into(md)
78✔
273
                             .matches(PCRE2_NO_UTF_CHECK)
156✔
274
                             .ignore_error();
78✔
275
        if (!match_res) {
78✔
276
            return;
4✔
277
        }
278

279
        auto ts_cap = md[fmt.pf_timestamp_index].value();
74✔
280
        auto lr = to_line_range(ts_cap.trim());
74✔
281
        sa.emplace_back(lr, logline::L_TIMESTAMP.value());
74✔
282

283
        values.lvv_values.emplace_back(TS_META, line, lr);
74✔
284
        values.lvv_values.back().lv_meta.lvm_format = (log_format*) this;
74✔
285

286
        prefix_len = ts_cap.sf_end;
74✔
287
        auto level_cap = md[2];
74✔
288
        if (level_cap) {
74✔
289
            if (string2level(level_cap->data(), level_cap->length(), true)
74✔
290
                != LEVEL_UNKNOWN)
74✔
291
            {
292
                prefix_len = level_cap->sf_end;
63✔
293

294
                values.lvv_values.emplace_back(
63✔
295
                    LEVEL_META, line, to_line_range(level_cap->trim()));
63✔
296
                values.lvv_values.back().lv_meta.lvm_format
63✔
297
                    = (log_format*) this;
126✔
298
            }
299
        }
300

301
        lr.lr_start = 0;
74✔
302
        lr.lr_end = prefix_len;
74✔
303
        sa.emplace_back(lr, logline::L_PREFIX.value());
74✔
304

305
        lr.lr_start = prefix_len;
74✔
306
        lr.lr_end = line.length();
74✔
307
        sa.emplace_back(lr, SA_BODY.value());
74✔
308

309
        log_format::annotate(lf, line_number, sa, values, annotate_module);
74✔
310
    }
78✔
311

312
    std::shared_ptr<log_format> specialized(int fmt_lock) override
47✔
313
    {
314
        auto retval = std::make_shared<generic_log_format>(*this);
47✔
315

316
        retval->lf_specialized = true;
47✔
317
        return retval;
94✔
318
    }
47✔
319

320
    bool hide_field(const intern_string_t field_name, bool val) override
2✔
321
    {
322
        if (field_name == TS_META.lvm_name) {
2✔
323
            TS_META.lvm_user_hidden = val;
1✔
324
            return true;
1✔
325
        } else if (field_name == LEVEL_META.lvm_name) {
1✔
326
            LEVEL_META.lvm_user_hidden = val;
1✔
327
            return true;
1✔
328
        }
329
        return false;
×
330
    }
331

332
    std::map<intern_string_t, logline_value_meta> get_field_states() override
17✔
333
    {
334
        return {
335
            {TS_META.lvm_name, TS_META},
336
            {LEVEL_META.lvm_name, LEVEL_META},
337
        };
51✔
338
    }
339

340
private:
341
    static logline_value_meta TS_META;
342
    static logline_value_meta LEVEL_META;
343
};
344

345
logline_value_meta generic_log_format::TS_META{
346
    intern_string::lookup("log_time"),
347
    value_kind_t::VALUE_TEXT,
348
    logline_value_meta::table_column{2},
349
};
350
logline_value_meta generic_log_format::LEVEL_META{
351
    intern_string::lookup("log_level"),
352
    value_kind_t::VALUE_TEXT,
353
    logline_value_meta::table_column{4},
354
};
355

356
std::string
357
from_escaped_string(const char* str, size_t len)
21✔
358
{
359
    std::string retval;
21✔
360

361
    for (size_t lpc = 0; lpc < len; lpc++) {
42✔
362
        switch (str[lpc]) {
21✔
363
            case '\\':
21✔
364
                if ((lpc + 3) < len && str[lpc + 1] == 'x') {
21✔
365
                    int ch;
366

367
                    if (sscanf(&str[lpc + 2], "%2x", &ch) == 1) {
21✔
368
                        retval.append(1, (char) ch & 0xff);
21✔
369
                        lpc += 3;
21✔
370
                    }
371
                }
372
                break;
21✔
373
            default:
×
374
                retval.append(1, str[lpc]);
×
375
                break;
×
376
        }
377
    }
378

379
    return retval;
21✔
380
}
×
381

382
std::optional<const char*>
383
lnav_strnstr(const char* s, const char* find, size_t slen)
1,085,619✔
384
{
385
    char c, sc;
386
    size_t len;
387

388
    if ((c = *find++) != '\0') {
1,085,619✔
389
        len = strlen(find);
1,085,619✔
390
        do {
391
            do {
392
                if (slen < 1 || (sc = *s) == '\0') {
4,807,624✔
393
                    return std::nullopt;
588,580✔
394
                }
395
                --slen;
4,219,044✔
396
                ++s;
4,219,044✔
397
            } while (sc != c);
4,219,044✔
398
            if (len > slen) {
497,039✔
399
                return std::nullopt;
×
400
            }
401
        } while (strncmp(s, find, len) != 0);
497,039✔
402
        s--;
497,039✔
403
    }
404
    return s;
497,039✔
405
}
406

407
struct separated_string {
408
    const char* ss_str;
409
    size_t ss_len;
410
    const char* ss_separator;
411
    size_t ss_separator_len;
412

413
    separated_string(const char* str, size_t len)
23,097✔
414
        : ss_str(str), ss_len(len), ss_separator(","),
23,097✔
415
          ss_separator_len(strlen(this->ss_separator))
23,097✔
416
    {
417
    }
23,097✔
418

419
    separated_string& with_separator(const char* sep)
23,097✔
420
    {
421
        this->ss_separator = sep;
23,097✔
422
        this->ss_separator_len = strlen(sep);
23,097✔
423
        return *this;
23,097✔
424
    }
425

426
    struct iterator {
427
        const separated_string& i_parent;
428
        const char* i_pos;
429
        const char* i_next_pos;
430
        size_t i_index;
431

432
        iterator(const separated_string& ss, const char* pos)
565,833✔
433
            : i_parent(ss), i_pos(pos), i_next_pos(pos), i_index(0)
565,833✔
434
        {
435
            this->update();
565,833✔
436
        }
565,833✔
437

438
        void update()
1,085,619✔
439
        {
440
            const separated_string& ss = this->i_parent;
1,085,619✔
441
            auto next_field
442
                = lnav_strnstr(this->i_pos,
1,085,619✔
443
                               ss.ss_separator,
1,085,619✔
444
                               ss.ss_len - (this->i_pos - ss.ss_str));
1,085,619✔
445
            if (next_field) {
1,085,619✔
446
                this->i_next_pos = next_field.value() + ss.ss_separator_len;
497,039✔
447
            } else {
448
                this->i_next_pos = ss.ss_str + ss.ss_len;
588,580✔
449
            }
450
        }
1,085,619✔
451

452
        iterator& operator++()
519,786✔
453
        {
454
            this->i_pos = this->i_next_pos;
519,786✔
455
            this->update();
519,786✔
456
            this->i_index += 1;
519,786✔
457

458
            return *this;
519,786✔
459
        }
460

461
        string_fragment operator*()
446,739✔
462
        {
463
            const auto& ss = this->i_parent;
446,739✔
464
            int end;
465

466
            if (this->i_next_pos < (ss.ss_str + ss.ss_len)) {
446,739✔
467
                end = this->i_next_pos - ss.ss_str - ss.ss_separator_len;
427,835✔
468
            } else {
469
                end = this->i_next_pos - ss.ss_str;
18,904✔
470
            }
471
            return string_fragment::from_byte_range(
446,739✔
472
                ss.ss_str, this->i_pos - ss.ss_str, end);
446,739✔
473
        }
474

475
        bool operator==(const iterator& other) const
542,736✔
476
        {
477
            return (&this->i_parent == &other.i_parent)
542,736✔
478
                && (this->i_pos == other.i_pos);
542,736✔
479
        }
480

481
        bool operator!=(const iterator& other) const
542,589✔
482
        {
483
            return !(*this == other);
542,589✔
484
        }
485

486
        size_t index() const { return this->i_index; }
1,150,727✔
487
    };
488

489
    iterator begin() { return {*this, this->ss_str}; }
23,097✔
490

491
    iterator end() { return {*this, this->ss_str + this->ss_len}; }
542,736✔
492
};
493

494
class bro_log_format : public log_format {
495
public:
496
    struct field_def {
497
        logline_value_meta fd_meta;
498
        logline_value_meta* fd_root_meta;
499
        std::string fd_collator;
500
        std::optional<size_t> fd_numeric_index;
501

502
        explicit field_def(const intern_string_t name,
593✔
503
                           size_t col,
504
                           log_format* format)
505
            : fd_meta(name,
1,186✔
506
                      value_kind_t::VALUE_TEXT,
507
                      logline_value_meta::table_column{col},
593✔
508
                      format),
509
              fd_root_meta(&FIELD_META.find(name)->second)
593✔
510
        {
511
        }
593✔
512

513
        field_def& with_kind(value_kind_t kind,
437✔
514
                             bool identifier = false,
515
                             bool foreign_key = false,
516
                             const std::string& collator = "")
517
        {
518
            this->fd_meta.lvm_kind = kind;
437✔
519
            this->fd_meta.lvm_identifier = identifier;
437✔
520
            this->fd_meta.lvm_foreign_key = foreign_key;
437✔
521
            this->fd_collator = collator;
437✔
522
            return *this;
437✔
523
        }
524

525
        field_def& with_numeric_index(size_t index)
111✔
526
        {
527
            this->fd_numeric_index = index;
111✔
528
            return *this;
111✔
529
        }
530
    };
531

532
    static std::unordered_map<const intern_string_t, logline_value_meta>
533
        FIELD_META;
534

535
    static const intern_string_t get_opid_desc()
2,414✔
536
    {
537
        static const intern_string_t RETVAL = intern_string::lookup("std");
2,414✔
538

539
        return RETVAL;
2,414✔
540
    }
541

542
    bro_log_format()
632✔
543
    {
632✔
544
        this->lf_structured = true;
632✔
545
        this->lf_is_self_describing = true;
632✔
546
        this->lf_time_ordered = false;
632✔
547

548
        auto desc_v = std::make_shared<std::vector<opid_descriptor>>();
632✔
549
        desc_v->emplace({});
632✔
550
        this->lf_opid_description_def->emplace(get_opid_desc(),
1,264✔
551
                                               opid_descriptors{desc_v});
1,264✔
552
    }
632✔
553

554
    const intern_string_t get_name() const override
128,971✔
555
    {
556
        static const intern_string_t name(intern_string::lookup("bro"));
128,971✔
557

558
        return this->blf_format_name.empty() ? name : this->blf_format_name;
128,971✔
559
    }
560

561
    void clear() override
10,198✔
562
    {
563
        this->log_format::clear();
10,198✔
564
        this->blf_format_name.clear();
10,198✔
565
        this->blf_field_defs.clear();
10,198✔
566
    }
10,198✔
567

568
    scan_result_t scan_int(std::vector<logline>& dst,
3,969✔
569
                           const line_info& li,
570
                           shared_buffer_ref& sbr,
571
                           scan_batch_context& sbc)
572
    {
573
        static const intern_string_t STATUS_CODE
574
            = intern_string::lookup("bro_status_code");
3,969✔
575
        static const intern_string_t TS = intern_string::lookup("bro_ts");
3,969✔
576
        static const intern_string_t UID = intern_string::lookup("bro_uid");
3,969✔
577
        static const intern_string_t ID_ORIG_H
578
            = intern_string::lookup("bro_id_orig_h");
3,969✔
579

580
        separated_string ss(sbr.get_data(), sbr.length());
3,969✔
581
        struct timeval tv;
582
        struct exttm tm;
3,969✔
583
        bool found_ts = false;
3,969✔
584
        log_level_t level = LEVEL_INFO;
3,969✔
585
        uint8_t opid = 0;
3,969✔
586
        auto opid_cap = string_fragment::invalid();
3,969✔
587
        auto host_cap = string_fragment::invalid();
3,969✔
588

589
        ss.with_separator(this->blf_separator.get());
3,969✔
590

591
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
116,973✔
592
            if (iter.index() == 0 && *iter == "#close") {
113,025✔
593
                return scan_match{2000};
21✔
594
            }
595

596
            if (iter.index() >= this->blf_field_defs.size()) {
113,004✔
597
                break;
×
598
            }
599

600
            const auto& fd = this->blf_field_defs[iter.index()];
113,004✔
601

602
            if (TS == fd.fd_meta.lvm_name) {
113,004✔
603
                string_fragment sf = *iter;
3,948✔
604

605
                if (this->lf_date_time.scan(
3,948✔
606
                        sf.data(), sf.length(), nullptr, &tm, tv))
3,948✔
607
                {
608
                    this->lf_timestamp_flags = tm.et_flags;
3,948✔
609
                    found_ts = true;
3,948✔
610
                }
611
            } else if (STATUS_CODE == fd.fd_meta.lvm_name) {
109,056✔
612
                const auto sf = *iter;
3,762✔
613

614
                if (!sf.empty() && sf[0] >= '4') {
3,762✔
615
                    level = LEVEL_ERROR;
19✔
616
                }
617
            } else if (UID == fd.fd_meta.lvm_name) {
105,294✔
618
                opid_cap = *iter;
3,948✔
619

620
                opid = hash_str(opid_cap.data(), opid_cap.length());
3,948✔
621
            } else if (ID_ORIG_H == fd.fd_meta.lvm_name) {
101,346✔
622
                host_cap = *iter;
3,948✔
623
            }
624

625
            if (fd.fd_numeric_index) {
113,004✔
626
                switch (fd.fd_meta.lvm_kind) {
20,298✔
627
                    case value_kind_t::VALUE_INTEGER:
20,298✔
628
                    case value_kind_t::VALUE_FLOAT: {
629
                        const auto sv = (*iter).to_string_view();
20,298✔
630
                        auto scan_float_res = scn::scan_value<double>(sv);
20,298✔
631
                        if (scan_float_res) {
20,298✔
632
                            this->lf_value_stats[fd.fd_numeric_index.value()]
16,536✔
633
                                .add_value(scan_float_res.value());
16,536✔
634
                        }
635
                        break;
20,298✔
636
                    }
637
                    default:
×
638
                        break;
×
639
                }
640
            }
641
        }
642

643
        if (found_ts) {
3,948✔
644
            if (!this->lf_specialized) {
3,948✔
645
                for (auto& ll : dst) {
189✔
646
                    ll.set_ignore(true);
168✔
647
                }
648
            }
649

650
            if (opid_cap.is_valid()) {
3,948✔
651
                auto opid_iter
652
                    = sbc.sbc_opids.insert_op(sbc.sbc_allocator, opid_cap, tv);
3,948✔
653
                opid_iter->second.otr_level_stats.update_msg_count(level);
3,948✔
654

655
                auto& otr = opid_iter->second;
3,948✔
656
                if (!otr.otr_description.lod_id && host_cap.is_valid()
5,730✔
657
                    && otr.otr_description.lod_elements.empty())
5,730✔
658
                {
659
                    otr.otr_description.lod_id = get_opid_desc();
1,782✔
660
                    otr.otr_description.lod_elements.emplace_back(
3,564✔
661
                        0, host_cap.to_string());
1,782✔
662
                }
663
            }
664
            dst.emplace_back(li.li_file_range.fr_offset, tv, level, 0, opid);
3,948✔
665
            return scan_match{2000};
3,948✔
666
        }
667
        return scan_no_match{};
×
668
    }
669

670
    scan_result_t scan(logfile& lf,
10,177✔
671
                       std::vector<logline>& dst,
672
                       const line_info& li,
673
                       shared_buffer_ref& sbr,
674
                       scan_batch_context& sbc) override
675
    {
676
        static const auto SEP_RE
677
            = lnav::pcre2pp::code::from_const(R"(^#separator\s+(.+))");
10,177✔
678

679
        if (dst.empty()) {
10,177✔
680
            auto file_options = lf.get_file_options();
967✔
681

682
            if (file_options) {
967✔
683
                this->lf_date_time.dts_default_zone
684
                    = file_options->second.fo_default_zone.pp_value;
35✔
685
            } else {
686
                this->lf_date_time.dts_default_zone = nullptr;
932✔
687
            }
688
        }
967✔
689

690
        if (!this->blf_format_name.empty()) {
10,177✔
691
            return this->scan_int(dst, li, sbr, sbc);
3,948✔
692
        }
693

694
        if (dst.empty() || dst.size() > 20 || sbr.empty()
11,491✔
695
            || sbr.get_data()[0] == '#')
11,491✔
696
        {
697
            return scan_no_match{};
4,100✔
698
        }
699

700
        auto line_iter = dst.begin();
2,129✔
701
        auto read_result = lf.read_line(line_iter);
2,129✔
702

703
        if (read_result.isErr()) {
2,129✔
704
            return scan_no_match{"unable to read first line"};
×
705
        }
706

707
        auto line = read_result.unwrap();
2,129✔
708
        auto md = SEP_RE.create_match_data();
2,129✔
709

710
        auto match_res = SEP_RE.capture_from(line.to_string_fragment())
2,129✔
711
                             .into(md)
2,129✔
712
                             .matches(PCRE2_NO_UTF_CHECK)
4,258✔
713
                             .ignore_error();
2,129✔
714
        if (!match_res) {
2,129✔
715
            return scan_no_match{"cannot read separator header"};
2,108✔
716
        }
717

718
        this->clear();
21✔
719

720
        auto sep = from_escaped_string(md[1]->data(), md[1]->length());
21✔
721
        this->blf_separator = intern_string::lookup(sep);
21✔
722

723
        for (++line_iter; line_iter != dst.end(); ++line_iter) {
168✔
724
            auto next_read_result = lf.read_line(line_iter);
147✔
725

726
            if (next_read_result.isErr()) {
147✔
727
                return scan_no_match{"unable to read header line"};
×
728
            }
729

730
            line = next_read_result.unwrap();
147✔
731
            separated_string ss(line.get_data(), line.length());
147✔
732

733
            ss.with_separator(this->blf_separator.get());
147✔
734
            auto iter = ss.begin();
147✔
735

736
            string_fragment directive = *iter;
147✔
737

738
            if (directive.empty() || directive[0] != '#') {
147✔
739
                continue;
×
740
            }
741

742
            ++iter;
147✔
743
            if (iter == ss.end()) {
147✔
744
                continue;
×
745
            }
746

747
            if (directive == "#set_separator") {
147✔
748
                this->blf_set_separator = intern_string::lookup(*iter);
21✔
749
            } else if (directive == "#empty_field") {
126✔
750
                this->blf_empty_field = intern_string::lookup(*iter);
21✔
751
            } else if (directive == "#unset_field") {
105✔
752
                this->blf_unset_field = intern_string::lookup(*iter);
21✔
753
            } else if (directive == "#path") {
84✔
754
                auto full_name = fmt::format(FMT_STRING("bro_{}_log"), *iter);
63✔
755
                this->blf_format_name = intern_string::lookup(full_name);
21✔
756
            } else if (directive == "#fields" && this->blf_field_defs.empty()) {
21✔
757
                do {
21✔
758
                    auto field_name
84✔
759
                        = intern_string::lookup("bro_" + sql_safe_ident(*iter));
760
                    auto common_iter = FIELD_META.find(field_name);
761
                    if (common_iter == FIELD_META.end()) {
593✔
762
                        FIELD_META.emplace(field_name,
593✔
763
                                           logline_value_meta{
593✔
764
                                               field_name,
587✔
765
                                               value_kind_t::VALUE_TEXT,
1,174✔
766
                                           });
767
                    }
768
                    this->blf_field_defs.emplace_back(
769
                        field_name, this->blf_field_defs.size(), this);
770
                    ++iter;
1,186✔
771
                } while (iter != ss.end());
593✔
772
            } else if (directive == "#types") {
593✔
773
                static const char* KNOWN_IDS[] = {
593✔
774
                    "bro_conn_uids",
42✔
775
                    "bro_fuid",
776
                    "bro_host",
777
                    "bro_info_code",
778
                    "bro_method",
779
                    "bro_mime_type",
780
                    "bro_orig_fuids",
781
                    "bro_parent_fuid",
782
                    "bro_proto",
783
                    "bro_referrer",
784
                    "bro_resp_fuids",
785
                    "bro_service",
786
                    "bro_uid",
787
                    "bro_uri",
788
                    "bro_user_agent",
789
                    "bro_username",
790
                };
791
                static const char* KNOWN_FOREIGN[] = {
792
                    "bro_status_code",
793
                };
794

795
                int numeric_count = 0;
796

797
                do {
21✔
798
                    string_fragment field_type = *iter;
799
                    auto& fd = this->blf_field_defs[iter.index() - 1];
800

593✔
801
                    if (field_type == "time") {
593✔
802
                        fd.with_kind(value_kind_t::VALUE_TIMESTAMP);
803
                    } else if (field_type == "string") {
593✔
804
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
21✔
805
                                                        std::end(KNOWN_IDS),
572✔
806
                                                        fd.fd_meta.lvm_name);
217✔
807
                        fd.with_kind(value_kind_t::VALUE_TEXT, ident);
808
                    } else if (field_type == "count") {
217✔
809
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
217✔
810
                                                        std::end(KNOWN_IDS),
355✔
811
                                                        fd.fd_meta.lvm_name);
109✔
812
                        bool foreign
813
                            = std::binary_search(std::begin(KNOWN_FOREIGN),
109✔
814
                                                 std::end(KNOWN_FOREIGN),
815
                                                 fd.fd_meta.lvm_name);
109✔
816
                        fd.with_kind(
817
                              value_kind_t::VALUE_INTEGER, ident, foreign)
109✔
818
                            .with_numeric_index(numeric_count);
218✔
819
                        numeric_count += 1;
820
                    } else if (field_type == "bool") {
109✔
821
                        fd.with_kind(value_kind_t::VALUE_BOOLEAN);
109✔
822
                    } else if (field_type == "addr") {
246✔
823
                        fd.with_kind(
4✔
824
                            value_kind_t::VALUE_TEXT, true, false, "ipaddress");
242✔
825
                    } else if (field_type == "port") {
42✔
826
                        fd.with_kind(value_kind_t::VALUE_INTEGER, true);
827
                    } else if (field_type == "interval") {
200✔
828
                        fd.with_kind(value_kind_t::VALUE_FLOAT)
42✔
829
                            .with_numeric_index(numeric_count);
158✔
830
                        numeric_count += 1;
4✔
831
                    }
2✔
832

2✔
833
                    ++iter;
834
                } while (iter != ss.end());
835

593✔
836
                this->lf_value_stats.resize(numeric_count);
593✔
837
            }
838
        }
21✔
839

840
        if (!this->blf_format_name.empty() && !this->blf_separator.empty()
147✔
841
            && !this->blf_field_defs.empty())
842
        {
42✔
843
            return this->scan_int(dst, li, sbr, sbc);
42✔
844
        }
845

21✔
846
        this->blf_format_name.clear();
847
        this->lf_value_stats.clear();
848

×
849
        return scan_no_match{};
×
850
    }
851

×
852
    void annotate(logfile* lf,
2,129✔
853
                  uint64_t line_number,
854
                  string_attrs_t& sa,
18,981✔
855
                  logline_value_vector& values,
856
                  bool annotate_module) const override
857
    {
858
        static const intern_string_t TS = intern_string::lookup("bro_ts");
859
        static const intern_string_t UID = intern_string::lookup("bro_uid");
860

18,981✔
861
        auto& sbr = values.lvv_sbr;
18,981✔
862
        separated_string ss(sbr.get_data(), sbr.length());
863

18,981✔
864
        ss.with_separator(this->blf_separator.get());
18,981✔
865

866
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
18,981✔
867
            if (iter.index() >= this->blf_field_defs.size()) {
868
                return;
424,430✔
869
            }
405,652✔
870

203✔
871
            const field_def& fd = this->blf_field_defs[iter.index()];
872
            string_fragment sf = *iter;
873

405,449✔
874
            if (sf == this->blf_empty_field) {
405,449✔
875
                sf.clear();
876
            } else if (sf == this->blf_unset_field) {
405,449✔
877
                sf.invalidate();
18,784✔
878
            }
386,665✔
879

46,084✔
880
            auto lr = line_range(sf.sf_begin, sf.sf_end);
881

882
            if (fd.fd_meta.lvm_name == TS) {
405,449✔
883
                sa.emplace_back(lr, logline::L_TIMESTAMP.value());
884
            } else if (fd.fd_meta.lvm_name == UID) {
405,449✔
885
                sa.emplace_back(lr, logline::L_OPID.value());
18,981✔
886
            }
386,468✔
887

18,981✔
888
            if (lr.is_valid()) {
889
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
890
            } else {
405,449✔
891
                values.lvv_values.emplace_back(fd.fd_meta);
359,365✔
892
            }
893
            values.lvv_values.back().lv_meta.lvm_user_hidden
46,084✔
894
                = fd.fd_root_meta->lvm_user_hidden;
895
        }
405,449✔
896

405,449✔
897
        log_format::annotate(lf, line_number, sa, values, annotate_module);
898
    }
899

18,778✔
900
    const logline_value_stats* stats_for_value(
901
        const intern_string_t& name) const override
902
    {
32✔
903
        const logline_value_stats* retval = nullptr;
904

905
        for (const auto& blf_field_def : this->blf_field_defs) {
32✔
906
            if (blf_field_def.fd_meta.lvm_name == name) {
907
                if (!blf_field_def.fd_numeric_index) {
480✔
908
                    break;
480✔
909
                }
32✔
910
                retval = &this->lf_value_stats[blf_field_def.fd_numeric_index
×
911
                                                   .value()];
912
                break;
913
            }
32✔
914
        }
32✔
915

916
        return retval;
917
    }
918

32✔
919
    bool hide_field(const intern_string_t field_name, bool val) override
920
    {
921
        auto fd_iter = FIELD_META.find(field_name);
2✔
922
        if (fd_iter == FIELD_META.end()) {
923
            return false;
2✔
924
        }
2✔
925

×
926
        fd_iter->second.lvm_user_hidden = val;
927

928
        return true;
2✔
929
    }
930

2✔
931
    std::map<intern_string_t, logline_value_meta> get_field_states() override
932
    {
933
        std::map<intern_string_t, logline_value_meta> retval;
17✔
934

935
        for (const auto& fd : FIELD_META) {
17✔
936
            retval.emplace(fd.first, fd.second);
937
        }
75✔
938

58✔
939
        return retval;
940
    }
941

17✔
942
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
×
943
    {
944
        auto retval = std::make_shared<bro_log_format>(*this);
21✔
945

946
        retval->lf_specialized = true;
21✔
947
        return retval;
948
    }
21✔
949

42✔
950
    class bro_log_table : public log_format_vtab_impl {
21✔
951
    public:
952
        explicit bro_log_table(const bro_log_format& format)
953
            : log_format_vtab_impl(format), blt_format(format)
954
        {
19✔
955
        }
19✔
956

957
        void get_columns(std::vector<vtab_column>& cols) const override
19✔
958
        {
959
            for (const auto& fd : this->blt_format.blf_field_defs) {
27✔
960
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
961
                    fd.fd_meta.lvm_kind);
794✔
962

767✔
963
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
767✔
964
                                  type_pair.first,
965
                                  fd.fd_collator,
767✔
966
                                  false,
967
                                  "",
767✔
968
                                  type_pair.second);
1,534✔
969
            }
970
        }
971

972
        void get_foreign_keys(
27✔
973
            std::vector<std::string>& keys_inout) const override
974
        {
9✔
975
            this->log_vtab_impl::get_foreign_keys(keys_inout);
976

977
            for (const auto& fd : this->blt_format.blf_field_defs) {
9✔
978
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
979
                    keys_inout.push_back(fd.fd_meta.lvm_name.to_string());
262✔
980
                }
253✔
981
            }
110✔
982
        }
983

984
        const bro_log_format& blt_format;
9✔
985
    };
986

987
    static std::map<intern_string_t, std::shared_ptr<bro_log_table>>&
988
    get_tables()
989
    {
990
        static std::map<intern_string_t, std::shared_ptr<bro_log_table>> retval;
19✔
991

992
        return retval;
19✔
993
    }
994

19✔
995
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
996
    {
997
        if (this->blf_format_name.empty()) {
537✔
998
            return nullptr;
999
        }
537✔
1000

518✔
1001
        std::shared_ptr<bro_log_table> retval = nullptr;
1002

1003
        auto& tables = get_tables();
19✔
1004
        auto iter = tables.find(this->blf_format_name);
1005
        if (iter == tables.end()) {
19✔
1006
            retval = std::make_shared<bro_log_table>(*this);
19✔
1007
            tables[this->blf_format_name] = retval;
19✔
1008
        }
19✔
1009

19✔
1010
        return retval;
1011
    }
1012

19✔
1013
    void get_subline(const logline& ll,
19✔
1014
                     shared_buffer_ref& sbr,
1015
                     bool full_message) override
22,811✔
1016
    {
1017
    }
1018

1019
    intern_string_t blf_format_name;
22,811✔
1020
    intern_string_t blf_separator;
1021
    intern_string_t blf_set_separator;
1022
    intern_string_t blf_empty_field;
1023
    intern_string_t blf_unset_field;
1024
    std::vector<field_def> blf_field_defs;
1025
};
1026

1027
std::unordered_map<const intern_string_t, logline_value_meta>
1028
    bro_log_format::FIELD_META;
1029

1030
struct ws_separated_string {
1031
    const char* ss_str;
1032
    size_t ss_len;
1033

1034
    explicit ws_separated_string(const char* str = nullptr, size_t len = -1)
1035
        : ss_str(str), ss_len(len)
1036
    {
17,409✔
1037
    }
17,409✔
1038

1039
    struct iterator {
17,409✔
1040
        enum class state_t {
1041
            NORMAL,
1042
            QUOTED,
1043
        };
1044

1045
        const ws_separated_string& i_parent;
1046
        const char* i_pos;
1047
        const char* i_next_pos;
1048
        size_t i_index{0};
1049
        state_t i_state{state_t::NORMAL};
1050

1051
        iterator(const ws_separated_string& ss, const char* pos)
1052
            : i_parent(ss), i_pos(pos), i_next_pos(pos)
1053
        {
28,291✔
1054
            this->update();
28,291✔
1055
        }
1056

28,291✔
1057
        void update()
28,291✔
1058
        {
1059
            const auto& ss = this->i_parent;
38,627✔
1060
            bool done = false;
1061

38,627✔
1062
            while (!done && this->i_next_pos < (ss.ss_str + ss.ss_len)) {
38,627✔
1063
                switch (this->i_state) {
1064
                    case state_t::NORMAL:
38,059,025✔
1065
                        if (*this->i_next_pos == '"') {
38,020,398✔
1066
                            this->i_state = state_t::QUOTED;
38,013,948✔
1067
                        } else if (isspace(*this->i_next_pos)) {
38,013,948✔
1068
                            done = true;
244✔
1069
                        }
38,013,704✔
1070
                        break;
22,750✔
1071
                    case state_t::QUOTED:
1072
                        if (*this->i_next_pos == '"') {
38,013,948✔
1073
                            this->i_state = state_t::NORMAL;
6,450✔
1074
                        }
6,450✔
1075
                        break;
244✔
1076
                }
1077
                if (!done) {
6,450✔
1078
                    this->i_next_pos += 1;
1079
                }
38,020,398✔
1080
            }
37,997,648✔
1081
        }
1082

1083
        iterator& operator++()
38,627✔
1084
        {
1085
            const auto& ss = this->i_parent;
10,336✔
1086

1087
            this->i_pos = this->i_next_pos;
10,336✔
1088
            while (this->i_pos < (ss.ss_str + ss.ss_len)
1089
                   && isspace(*this->i_pos))
10,336✔
1090
            {
10,336✔
1091
                this->i_pos += 1;
20,165✔
1092
                this->i_next_pos += 1;
1093
            }
9,829✔
1094
            this->update();
9,829✔
1095
            this->i_index += 1;
1096

10,336✔
1097
            return *this;
10,336✔
1098
        }
1099

10,336✔
1100
        string_fragment operator*()
1101
        {
1102
            const auto& ss = this->i_parent;
24,897✔
1103
            int end = this->i_next_pos - ss.ss_str;
1104

24,897✔
1105
            return string_fragment(ss.ss_str, this->i_pos - ss.ss_str, end);
24,897✔
1106
        }
1107

24,897✔
1108
        bool operator==(const iterator& other) const
1109
        {
1110
            return (&this->i_parent == &other.i_parent)
10,882✔
1111
                && (this->i_pos == other.i_pos);
1112
        }
10,882✔
1113

10,882✔
1114
        bool operator!=(const iterator& other) const
1115
        {
1116
            return !(*this == other);
8,531✔
1117
        }
1118

8,531✔
1119
        size_t index() const { return this->i_index; }
1120
    };
1121

15,793✔
1122
    iterator begin() { return {*this, this->ss_str}; }
1123

1124
    iterator end() { return {*this, this->ss_str + this->ss_len}; }
17,409✔
1125
};
1126

10,882✔
1127
class w3c_log_format : public log_format {
1128
public:
1129
    struct field_def {
1130
        const intern_string_t fd_name;
1131
        logline_value_meta fd_meta;
1132
        logline_value_meta* fd_root_meta{nullptr};
1133
        std::string fd_collator;
1134
        std::optional<size_t> fd_numeric_index;
1135

1136
        explicit field_def(const intern_string_t name)
1137
            : fd_name(name), fd_meta(intern_string::lookup(sql_safe_ident(
1138
                                         name.to_string_fragment())),
11✔
1139
                                     value_kind_t::VALUE_TEXT)
22✔
1140
        {
22✔
1141
        }
11✔
1142

1143
        field_def(const intern_string_t name, logline_value_meta meta)
11✔
1144
            : fd_name(name), fd_meta(meta)
1145
        {
58✔
1146
        }
58✔
1147

1148
        field_def(size_t col,
58✔
1149
                  const char* name,
1150
                  value_kind_t kind,
16,320✔
1151
                  bool ident = false,
1152
                  bool foreign_key = false,
1153
                  std::string coll = "")
1154
            : fd_name(intern_string::lookup(name)),
1155
              fd_meta(
1156
                  intern_string::lookup(sql_safe_ident(string_fragment(name))),
16,320✔
1157
                  kind,
32,640✔
1158
                  logline_value_meta::table_column{col}),
32,640✔
1159
              fd_collator(std::move(coll))
1160
        {
16,320✔
1161
            this->fd_meta.lvm_identifier = ident;
16,320✔
1162
            this->fd_meta.lvm_foreign_key = foreign_key;
1163
        }
16,320✔
1164

16,320✔
1165
        field_def& with_kind(value_kind_t kind,
16,320✔
1166
                             bool identifier = false,
1167
                             const std::string& collator = "")
1168
        {
1169
            this->fd_meta.lvm_kind = kind;
1170
            this->fd_meta.lvm_identifier = identifier;
1171
            this->fd_collator = collator;
1172
            return *this;
1173
        }
1174

1175
        field_def& with_numeric_index(int index)
1176
        {
1177
            this->fd_numeric_index = index;
27✔
1178
            return *this;
1179
        }
27✔
1180
    };
27✔
1181

1182
    static std::unordered_map<const intern_string_t, logline_value_meta>
1183
        FIELD_META;
1184

1185
    struct field_to_struct_t {
1186
        field_to_struct_t(const char* prefix, const char* struct_name)
1187
            : fs_prefix(prefix),
1188
              fs_struct_name(intern_string::lookup(struct_name))
4,080✔
1189
        {
4,080✔
1190
        }
4,080✔
1191

1192
        const char* fs_prefix;
4,080✔
1193
        intern_string_t fs_struct_name;
1194
    };
1195

1196
    static const std::vector<field_def> KNOWN_FIELDS;
1197
    const static std::vector<field_to_struct_t> KNOWN_STRUCT_FIELDS;
1198

1199
    w3c_log_format()
1200
    {
1201
        this->lf_is_self_describing = true;
632✔
1202
        this->lf_time_ordered = false;
632✔
1203
        this->lf_structured = true;
632✔
1204
    }
632✔
1205

632✔
1206
    const intern_string_t get_name() const override
632✔
1207
    {
1208
        static const intern_string_t name(intern_string::lookup("w3c"));
12,150✔
1209

1210
        return this->wlf_format_name.empty() ? name : this->wlf_format_name;
12,150✔
1211
    }
1212

12,150✔
1213
    void clear() override
1214
    {
1215
        this->log_format::clear();
12,502✔
1216
        this->wlf_time_scanner.clear();
1217
        this->wlf_format_name.clear();
12,502✔
1218
        this->wlf_field_defs.clear();
12,502✔
1219
    }
12,502✔
1220

12,502✔
1221
    scan_result_t scan_int(std::vector<logline>& dst,
12,502✔
1222
                           const line_info& li,
1223
                           shared_buffer_ref& sbr)
310✔
1224
    {
1225
        static const intern_string_t F_DATE = intern_string::lookup("date");
1226
        static const intern_string_t F_DATE_LOCAL
1227
            = intern_string::lookup("date-local");
310✔
1228
        static const intern_string_t F_DATE_UTC
1229
            = intern_string::lookup("date-UTC");
310✔
1230
        static const intern_string_t F_TIME = intern_string::lookup("time");
1231
        static const intern_string_t F_TIME_LOCAL
310✔
1232
            = intern_string::lookup("time-local");
310✔
1233
        static const intern_string_t F_TIME_UTC
1234
            = intern_string::lookup("time-UTC");
310✔
1235
        static const intern_string_t F_STATUS_CODE
1236
            = intern_string::lookup("sc-status");
310✔
1237

1238
        ws_separated_string ss(sbr.get_data(), sbr.length());
310✔
1239
        struct timeval date_tv {
1240
            0, 0
310✔
1241
        }, time_tv{0, 0};
310✔
1242
        struct exttm date_tm, time_tm;
1243
        bool found_date = false, found_time = false;
310✔
1244
        log_level_t level = LEVEL_INFO;
310✔
1245

310✔
1246
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
310✔
1247
            if (iter.index() >= this->wlf_field_defs.size()) {
1248
                level = LEVEL_INVALID;
4,336✔
1249
                break;
4,076✔
1250
            }
1✔
1251

1✔
1252
            const auto& fd = this->wlf_field_defs[iter.index()];
1253
            string_fragment sf = *iter;
1254

4,075✔
1255
            if (sf.startswith("#")) {
4,075✔
1256
                if (sf == "#Date:") {
1257
                    auto sbr_sf_opt
4,075✔
1258
                        = sbr.to_string_fragment().consume_n(sf.length());
49✔
1259

1260
                    if (sbr_sf_opt) {
13✔
1261
                        auto sbr_sf = sbr_sf_opt.value().trim();
1262
                        date_time_scanner dts;
13✔
1263
                        struct exttm tm;
13✔
1264
                        struct timeval tv;
13✔
1265

13✔
1266
                        if (dts.scan(sbr_sf.data(),
1267
                                     sbr_sf.length(),
1268
                                     nullptr,
13✔
1269
                                     &tm,
13✔
1270
                                     tv))
1271
                        {
1272
                            this->lf_date_time.set_base_time(tv.tv_sec,
1273
                                                             tm.et_tm);
1274
                            this->wlf_time_scanner.set_base_time(tv.tv_sec,
12✔
1275
                                                                 tm.et_tm);
1276
                        }
12✔
1277
                    }
1278
                }
1279
                dst.emplace_back(
1280
                    li.li_file_range.fr_offset, 0, 0, LEVEL_IGNORE, 0);
1281
                return scan_match{2000};
49✔
1282
            }
49✔
1283

49✔
1284
            sf = sf.trim("\" \t");
1285
            if (F_DATE == fd.fd_name || F_DATE_LOCAL == fd.fd_name
1286
                || F_DATE_UTC == fd.fd_name)
4,026✔
1287
            {
7,836✔
1288
                if (this->lf_date_time.scan(
7,836✔
1289
                        sf.data(), sf.length(), nullptr, &date_tm, date_tv))
1290
                {
224✔
1291
                    this->lf_timestamp_flags |= date_tm.et_flags;
224✔
1292
                    found_date = true;
1293
                }
224✔
1294
            } else if (F_TIME == fd.fd_name || F_TIME_LOCAL == fd.fd_name
224✔
1295
                       || F_TIME_UTC == fd.fd_name)
1296
            {
7,356✔
1297
                if (this->wlf_time_scanner.scan(
7,356✔
1298
                        sf.data(), sf.length(), nullptr, &time_tm, time_tv))
1299
                {
256✔
1300
                    this->lf_timestamp_flags |= time_tm.et_flags;
256✔
1301
                    found_time = true;
1302
                }
256✔
1303
            } else if (F_STATUS_CODE == fd.fd_name) {
256✔
1304
                if (!sf.empty() && sf[0] >= '4') {
1305
                    level = LEVEL_ERROR;
3,546✔
1306
                }
254✔
1307
            }
206✔
1308

1309
            if (fd.fd_numeric_index) {
1310
                switch (fd.fd_meta.lvm_kind) {
1311
                    case value_kind_t::VALUE_INTEGER:
4,026✔
1312
                    case value_kind_t::VALUE_FLOAT: {
1,338✔
1313
                        auto scan_float_res
1,338✔
1314
                            = scn::scan_value<double>(sf.to_string_view());
1315

1316
                        if (scan_float_res) {
1,338✔
1317
                            this->lf_value_stats[fd.fd_numeric_index.value()]
1318
                                .add_value(scan_float_res.value());
1,338✔
1319
                        }
1,334✔
1320
                        break;
1,334✔
1321
                    }
1322
                    default:
1,338✔
1323
                        break;
1324
                }
×
1325
            }
×
1326
        }
1327

1328
        if (found_time) {
1329
            struct exttm tm = time_tm;
1330
            struct timeval tv;
261✔
1331

256✔
1332
            if (found_date) {
1333
                tm.et_tm.tm_year = date_tm.et_tm.tm_year;
1334
                tm.et_tm.tm_mday = date_tm.et_tm.tm_mday;
256✔
1335
                tm.et_tm.tm_mon = date_tm.et_tm.tm_mon;
224✔
1336
                tm.et_tm.tm_wday = date_tm.et_tm.tm_wday;
224✔
1337
                tm.et_tm.tm_yday = date_tm.et_tm.tm_yday;
224✔
1338
            }
224✔
1339

224✔
1340
            tv = tm.to_timeval();
1341
            if (!this->lf_specialized) {
1342
                for (auto& ll : dst) {
256✔
1343
                    ll.set_ignore(true);
256✔
1344
                }
48✔
1345
            }
39✔
1346
            dst.emplace_back(li.li_file_range.fr_offset, tv, level, 0);
1347
            return scan_match{2000};
1348
        }
256✔
1349

256✔
1350
        return scan_no_match{};
1351
    }
1352

5✔
1353
    scan_result_t scan(logfile& lf,
1354
                       std::vector<logline>& dst,
1355
                       const line_info& li,
10,181✔
1356
                       shared_buffer_ref& sbr,
1357
                       scan_batch_context& sbc) override
1358
    {
1359
        static const auto* W3C_LOG_NAME = intern_string::lookup("w3c_log");
1360
        static const auto* X_FIELDS_NAME = intern_string::lookup("x_fields");
1361
        static auto X_FIELDS_IDX = 0;
10,181✔
1362

10,181✔
1363
        if (li.li_partial) {
1364
            return scan_incomplete{};
1365
        }
10,181✔
1366

13✔
1367
        if (dst.empty()) {
1368
            auto file_options = lf.get_file_options();
1369

10,168✔
1370
            if (file_options) {
966✔
1371
                this->lf_date_time.dts_default_zone
1372
                    = file_options->second.fo_default_zone.pp_value;
966✔
1373
            } else {
1374
                this->lf_date_time.dts_default_zone = nullptr;
35✔
1375
            }
1376
        }
931✔
1377

1378
        if (!this->wlf_format_name.empty()) {
966✔
1379
            return this->scan_int(dst, li, sbr);
1380
        }
10,168✔
1381

296✔
1382
        if (dst.empty() || dst.size() > 20 || sbr.empty()
1383
            || sbr.get_data()[0] == '#')
1384
        {
18,778✔
1385
            return scan_no_match{};
18,778✔
1386
        }
1387

7,547✔
1388
        this->clear();
1389

1390
        for (auto line_iter = dst.begin(); line_iter != dst.end(); ++line_iter)
2,325✔
1391
        {
1392
            auto next_read_result = lf.read_line(line_iter);
19,188✔
1393

1394
            if (next_read_result.isErr()) {
16,863✔
1395
                return scan_no_match{"unable to read first line"};
1396
            }
16,863✔
1397

×
1398
            auto line = next_read_result.unwrap();
1399
            ws_separated_string ss(line.get_data(), line.length());
1400
            auto iter = ss.begin();
16,863✔
1401
            const auto directive = *iter;
16,863✔
1402

16,863✔
1403
            if (directive.empty() || directive[0] != '#') {
16,863✔
1404
                continue;
1405
            }
16,863✔
1406

14,512✔
1407
            ++iter;
1408
            if (iter == ss.end()) {
1409
                continue;
2,351✔
1410
            }
2,351✔
1411

9✔
1412
            if (directive == "#Date:") {
1413
                date_time_scanner dts;
1414
                struct exttm tm;
2,342✔
1415
                struct timeval tv;
8✔
1416

8✔
1417
                if (dts.scan(line.get_data_at(directive.length() + 1),
1418
                             line.length() - directive.length() - 1,
1419
                             nullptr,
8✔
1420
                             &tm,
8✔
1421
                             tv))
1422
                {
1423
                    this->lf_date_time.set_base_time(tv.tv_sec, tm.et_tm);
1424
                    this->wlf_time_scanner.set_base_time(tv.tv_sec, tm.et_tm);
1425
                }
7✔
1426
            } else if (directive == "#Fields:" && this->wlf_field_defs.empty())
7✔
1427
            {
1428
                int numeric_count = 0;
2,334✔
1429

1430
                do {
14✔
1431
                    auto sf = (*iter).trim(")");
1432

1433
                    auto field_iter = std::find_if(
138✔
1434
                        begin(KNOWN_FIELDS),
1435
                        end(KNOWN_FIELDS),
138✔
1436
                        [&sf](auto elem) { return sf == elem.fd_name; });
1437
                    if (field_iter != end(KNOWN_FIELDS)) {
1438
                        this->wlf_field_defs.emplace_back(*field_iter);
1,612✔
1439
                        auto& fd = this->wlf_field_defs.back();
138✔
1440
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
69✔
1441
                        if (common_iter == FIELD_META.end()) {
69✔
1442
                            auto emp_res = FIELD_META.emplace(
69✔
1443
                                fd.fd_meta.lvm_name, fd.fd_meta);
69✔
1444
                            common_iter = emp_res.first;
68✔
1445
                        }
68✔
1446
                        fd.fd_root_meta = &common_iter->second;
68✔
1447
                    } else if (sf == "date" || sf == "time") {
1448
                        this->wlf_field_defs.emplace_back(
69✔
1449
                            intern_string::lookup(sf));
69✔
1450
                        auto& fd = this->wlf_field_defs.back();
22✔
1451
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
11✔
1452
                        if (common_iter == FIELD_META.end()) {
11✔
1453
                            auto emp_res = FIELD_META.emplace(
11✔
1454
                                fd.fd_meta.lvm_name, fd.fd_meta);
11✔
1455
                            common_iter = emp_res.first;
11✔
1456
                        }
11✔
1457
                        fd.fd_root_meta = &common_iter->second;
11✔
1458
                    } else {
1459
                        const auto fs_iter = std::find_if(
11✔
1460
                            begin(KNOWN_STRUCT_FIELDS),
1461
                            end(KNOWN_STRUCT_FIELDS),
58✔
1462
                            [&sf](auto elem) {
1463
                                return sf.startswith(elem.fs_prefix);
1464
                            });
193✔
1465
                        if (fs_iter != end(KNOWN_STRUCT_FIELDS)) {
193✔
1466
                            const intern_string_t field_name
1467
                                = intern_string::lookup(sf.substr(3));
58✔
1468
                            this->wlf_field_defs.emplace_back(
1469
                                field_name,
13✔
1470
                                logline_value_meta(
13✔
1471
                                    field_name,
1472
                                    value_kind_t::VALUE_TEXT,
26✔
1473
                                    logline_value_meta::table_column{
1474
                                        KNOWN_FIELDS.size() + 1
1475
                                        + std::distance(
×
1476
                                            begin(KNOWN_STRUCT_FIELDS),
13✔
1477
                                            fs_iter)},
13✔
1478
                                    this)
1479
                                    .with_struct_name(fs_iter->fs_struct_name));
1480
                        } else {
26✔
1481
                            const intern_string_t field_name
13✔
1482
                                = intern_string::lookup(sf);
1483
                            this->wlf_field_defs.emplace_back(
1484
                                field_name,
45✔
1485
                                logline_value_meta(
45✔
1486
                                    field_name,
1487
                                    value_kind_t::VALUE_TEXT,
90✔
1488
                                    logline_value_meta::table_column{
1489
                                        KNOWN_FIELDS.size() + X_FIELDS_IDX},
1490
                                    this)
×
1491
                                    .with_struct_name(X_FIELDS_NAME));
45✔
1492
                        }
90✔
1493
                    }
1494
                    auto& fd = this->wlf_field_defs.back();
1495
                    fd.fd_meta.lvm_format = std::make_optional(this);
1496
                    switch (fd.fd_meta.lvm_kind) {
138✔
1497
                        case value_kind_t::VALUE_FLOAT:
138✔
1498
                        case value_kind_t::VALUE_INTEGER:
138✔
1499
                            fd.with_numeric_index(numeric_count);
27✔
1500
                            numeric_count += 1;
1501
                            break;
27✔
1502
                        default:
27✔
1503
                            break;
27✔
1504
                    }
111✔
1505

111✔
1506
                    ++iter;
1507
                } while (iter != ss.end());
1508

138✔
1509
                this->wlf_format_name = W3C_LOG_NAME;
138✔
1510
                this->lf_value_stats.resize(numeric_count);
1511
            }
14✔
1512
        }
14✔
1513

1514
        if (!this->wlf_format_name.empty() && !this->wlf_field_defs.empty()) {
31,384✔
1515
            return this->scan_int(dst, li, sbr);
1516
        }
2,325✔
1517

14✔
1518
        this->wlf_format_name.clear();
1519
        this->lf_value_stats.clear();
1520

2,311✔
1521
        return scan_no_match{};
2,311✔
1522
    }
1523

2,311✔
1524
    void annotate(logfile* lf,
1525
                  uint64_t line_number,
1526
                  string_attrs_t& sa,
236✔
1527
                  logline_value_vector& values,
1528
                  bool annotate_module) const override
1529
    {
1530
        auto& sbr = values.lvv_sbr;
1531
        ws_separated_string ss(sbr.get_data(), sbr.length());
1532

236✔
1533
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
236✔
1534
            string_fragment sf = *iter;
1535

4,057✔
1536
            if (iter.index() >= this->wlf_field_defs.size()) {
3,821✔
1537
                sa.emplace_back(line_range{sf.sf_begin, -1},
1538
                                SA_INVALID.value("extra fields detected"));
3,821✔
1539
                return;
×
1540
            }
×
1541

×
1542
            const auto& fd = this->wlf_field_defs[iter.index()];
1543

1544
            if (sf == "-") {
3,821✔
1545
                sf.invalidate();
1546
            }
3,821✔
1547

659✔
1548
            auto lr = line_range(sf.sf_begin, sf.sf_end);
1549

1550
            if (lr.is_valid()) {
3,821✔
1551
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
1552
                if (sf.startswith("\"")) {
3,821✔
1553
                    auto& meta = values.lvv_values.back().lv_meta;
3,162✔
1554

3,162✔
1555
                    if (meta.lvm_kind == value_kind_t::VALUE_TEXT) {
28✔
1556
                        meta.lvm_kind = value_kind_t::VALUE_W3C_QUOTED;
1557
                    } else {
28✔
1558
                        meta.lvm_kind = value_kind_t::VALUE_NULL;
26✔
1559
                    }
1560
                }
2✔
1561
            } else {
1562
                values.lvv_values.emplace_back(fd.fd_meta);
1563
            }
1564
            if (fd.fd_root_meta != nullptr) {
659✔
1565
                values.lvv_values.back().lv_meta.lvm_user_hidden
1566
                    = fd.fd_root_meta->lvm_user_hidden;
3,821✔
1567
            }
3,125✔
1568
        }
3,125✔
1569
        log_format::annotate(lf, line_number, sa, values, annotate_module);
1570
    }
1571

236✔
1572
    const logline_value_stats* stats_for_value(
1573
        const intern_string_t& name) const override
1574
    {
×
1575
        const logline_value_stats* retval = nullptr;
1576

1577
        for (const auto& wlf_field_def : this->wlf_field_defs) {
×
1578
            if (wlf_field_def.fd_meta.lvm_name == name) {
1579
                if (!wlf_field_def.fd_numeric_index) {
×
1580
                    break;
×
1581
                }
×
1582
                retval = &this->lf_value_stats[wlf_field_def.fd_numeric_index
×
1583
                                                   .value()];
1584
                break;
1585
            }
×
1586
        }
×
1587

1588
        return retval;
1589
    }
1590

×
1591
    bool hide_field(const intern_string_t field_name, bool val) override
1592
    {
1593
        auto fd_iter = FIELD_META.find(field_name);
×
1594
        if (fd_iter == FIELD_META.end()) {
1595
            return false;
×
1596
        }
×
1597

×
1598
        fd_iter->second.lvm_user_hidden = val;
1599

1600
        return true;
×
1601
    }
1602

×
1603
    std::map<intern_string_t, logline_value_meta> get_field_states() override
1604
    {
1605
        std::map<intern_string_t, logline_value_meta> retval;
17✔
1606

1607
        for (const auto& fd : FIELD_META) {
17✔
1608
            retval.emplace(fd.first, fd.second);
1609
        }
17✔
1610

×
1611
        return retval;
1612
    }
1613

17✔
1614
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
×
1615
    {
1616
        auto retval = std::make_shared<w3c_log_format>(*this);
9✔
1617

1618
        retval->lf_specialized = true;
9✔
1619
        return retval;
1620
    }
9✔
1621

18✔
1622
    class w3c_log_table : public log_format_vtab_impl {
9✔
1623
    public:
1624
        explicit w3c_log_table(const w3c_log_format& format)
1625
            : log_format_vtab_impl(format), wlt_format(format)
1626
        {
6✔
1627
        }
6✔
1628

1629
        void get_columns(std::vector<vtab_column>& cols) const override
6✔
1630
        {
1631
            for (const auto& fd : KNOWN_FIELDS) {
9✔
1632
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
1633
                    fd.fd_meta.lvm_kind);
153✔
1634

144✔
1635
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
144✔
1636
                                  type_pair.first,
1637
                                  fd.fd_collator,
144✔
1638
                                  false,
1639
                                  "",
144✔
1640
                                  type_pair.second);
288✔
1641
            }
1642
            cols.emplace_back("x_fields");
1643
            cols.back().with_comment(
1644
                "A JSON-object that contains fields that are not first-class "
9✔
1645
                "columns");
9✔
1646
            for (const auto& fs : KNOWN_STRUCT_FIELDS) {
1647
                cols.emplace_back(fs.fs_struct_name.to_string());
1648
            }
45✔
1649
        };
36✔
1650

1651
        void get_foreign_keys(
9✔
1652
            std::vector<std::string>& keys_inout) const override
1653
        {
3✔
1654
            this->log_vtab_impl::get_foreign_keys(keys_inout);
1655

1656
            for (const auto& fd : KNOWN_FIELDS) {
3✔
1657
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
1658
                    keys_inout.push_back(fd.fd_meta.lvm_name.to_string());
51✔
1659
                }
48✔
1660
            }
30✔
1661
        }
1662

1663
        const w3c_log_format& wlt_format;
3✔
1664
    };
1665

1666
    static std::map<intern_string_t, std::shared_ptr<w3c_log_table>>&
1667
    get_tables()
1668
    {
1669
        static std::map<intern_string_t, std::shared_ptr<w3c_log_table>> retval;
6✔
1670

1671
        return retval;
6✔
1672
    }
1673

6✔
1674
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
1675
    {
1676
        if (this->wlf_format_name.empty()) {
524✔
1677
            return nullptr;
1678
        }
524✔
1679

518✔
1680
        std::shared_ptr<w3c_log_table> retval = nullptr;
1681

1682
        auto& tables = get_tables();
6✔
1683
        auto iter = tables.find(this->wlf_format_name);
1684
        if (iter == tables.end()) {
6✔
1685
            retval = std::make_shared<w3c_log_table>(*this);
6✔
1686
            tables[this->wlf_format_name] = retval;
6✔
1687
        }
6✔
1688

6✔
1689
        return retval;
1690
    }
1691

6✔
1692
    void get_subline(const logline& ll,
6✔
1693
                     shared_buffer_ref& sbr,
1694
                     bool full_message) override
341✔
1695
    {
1696
    }
1697

1698
    date_time_scanner wlf_time_scanner;
341✔
1699
    intern_string_t wlf_format_name;
1700
    std::vector<field_def> wlf_field_defs;
1701
};
1702

1703
std::unordered_map<const intern_string_t, logline_value_meta>
1704
    w3c_log_format::FIELD_META;
1705

1706
static size_t KNOWN_FIELD_INDEX = 0;
1707
const std::vector<w3c_log_format::field_def> w3c_log_format::KNOWN_FIELDS = {
1708
    {
1709
        KNOWN_FIELD_INDEX++,
1710
        "cs-method",
1711
        value_kind_t::VALUE_TEXT,
1712
        true,
1713
    },
1714
    {
1715
        KNOWN_FIELD_INDEX++,
1716
        "c-ip",
1717
        value_kind_t::VALUE_TEXT,
1718
        true,
1719
        false,
1720
        "ipaddress",
1721
    },
1722
    {
1723
        KNOWN_FIELD_INDEX++,
1724
        "cs-bytes",
1725
        value_kind_t::VALUE_INTEGER,
1726
        false,
1727
    },
1728
    {
1729
        KNOWN_FIELD_INDEX++,
1730
        "cs-host",
1731
        value_kind_t::VALUE_TEXT,
1732
        true,
1733
    },
1734
    {
1735
        KNOWN_FIELD_INDEX++,
1736
        "cs-uri-stem",
1737
        value_kind_t::VALUE_TEXT,
1738
        true,
1739
        false,
1740
        "naturalnocase",
1741
    },
1742
    {
1743
        KNOWN_FIELD_INDEX++,
1744
        "cs-uri-query",
1745
        value_kind_t::VALUE_TEXT,
1746
        false,
1747
    },
1748
    {
1749
        KNOWN_FIELD_INDEX++,
1750
        "cs-username",
1751
        value_kind_t::VALUE_TEXT,
1752
        false,
1753
    },
1754
    {
1755
        KNOWN_FIELD_INDEX++,
1756
        "cs-version",
1757
        value_kind_t::VALUE_TEXT,
1758
        true,
1759
    },
1760
    {
1761
        KNOWN_FIELD_INDEX++,
1762
        "s-ip",
1763
        value_kind_t::VALUE_TEXT,
1764
        true,
1765
        false,
1766
        "ipaddress",
1767
    },
1768
    {
1769
        KNOWN_FIELD_INDEX++,
1770
        "s-port",
1771
        value_kind_t::VALUE_INTEGER,
1772
        true,
1773
    },
1774
    {
1775
        KNOWN_FIELD_INDEX++,
1776
        "s-computername",
1777
        value_kind_t::VALUE_TEXT,
1778
        true,
1779
    },
1780
    {
1781
        KNOWN_FIELD_INDEX++,
1782
        "s-sitename",
1783
        value_kind_t::VALUE_TEXT,
1784
        true,
1785
    },
1786
    {
1787
        KNOWN_FIELD_INDEX++,
1788
        "sc-bytes",
1789
        value_kind_t::VALUE_INTEGER,
1790
        false,
1791
    },
1792
    {
1793
        KNOWN_FIELD_INDEX++,
1794
        "sc-status",
1795
        value_kind_t::VALUE_INTEGER,
1796
        false,
1797
        true,
1798
    },
1799
    {
1800
        KNOWN_FIELD_INDEX++,
1801
        "sc-substatus",
1802
        value_kind_t::VALUE_INTEGER,
1803
        false,
1804
    },
1805
    {
1806
        KNOWN_FIELD_INDEX++,
1807
        "time-taken",
1808
        value_kind_t::VALUE_FLOAT,
1809
        false,
1810
    },
1811
};
1812

1813
const std::vector<w3c_log_format::field_to_struct_t>
1814
    w3c_log_format::KNOWN_STRUCT_FIELDS = {
1815
        {"cs(", "cs_headers"},
1816
        {"sc(", "sc_headers"},
1817
        {"rs(", "rs_headers"},
1818
        {"sr(", "sr_headers"},
1819
};
1820

1821
struct logfmt_pair_handler {
1822
    explicit logfmt_pair_handler(date_time_scanner& dts) : lph_dt_scanner(dts)
1823
    {
1824
    }
10,177✔
1825

1826
    bool process_value(const string_fragment& value_frag)
10,177✔
1827
    {
1828
        if (this->lph_key_frag == "time" || this->lph_key_frag == "ts") {
1,969✔
1829
            if (!this->lph_dt_scanner.scan(value_frag.data(),
1830
                                           value_frag.length(),
1,969✔
1831
                                           nullptr,
18✔
1832
                                           &this->lph_time_tm,
18✔
1833
                                           this->lph_tv))
1834
            {
1835
                return false;
18✔
1836
            }
1837
            this->lph_found_time = true;
×
1838
        } else if (this->lph_key_frag == "level") {
1839
            this->lph_level
18✔
1840
                = string2level(value_frag.data(), value_frag.length());
1,951✔
1841
        }
1842
        return true;
28✔
1843
    }
1844

1,969✔
1845
    date_time_scanner& lph_dt_scanner;
1846
    bool lph_found_time{false};
1847
    struct exttm lph_time_tm {};
1848
    struct timeval lph_tv {
1849
        0, 0
1850
    };
1851
    log_level_t lph_level{log_level_t::LEVEL_INFO};
1852
    string_fragment lph_key_frag{""};
1853
};
1854

1855
class logfmt_format : public log_format {
1856
public:
1857
    const intern_string_t get_name() const override
1858
    {
1859
        const static intern_string_t NAME = intern_string::lookup("logfmt_log");
12,403✔
1860

1861
        return NAME;
12,403✔
1862
    }
1863

12,403✔
1864
    class logfmt_log_table : public log_format_vtab_impl {
1865
    public:
1866
        logfmt_log_table(const log_format& format)
1867
            : log_format_vtab_impl(format)
1868
        {
518✔
1869
        }
518✔
1870

1871
        void get_columns(std::vector<vtab_column>& cols) const override
518✔
1872
        {
1873
            static const auto FIELDS = std::string("fields");
520✔
1874

1875
            cols.emplace_back(FIELDS);
520✔
1876
        }
1877
    };
520✔
1878

520✔
1879
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
1880
    {
1881
        static auto retval = std::make_shared<logfmt_log_table>(*this);
518✔
1882

1883
        return retval;
518✔
1884
    }
1885

518✔
1886
    scan_result_t scan(logfile& lf,
1887
                       std::vector<logline>& dst,
1888
                       const line_info& li,
10,177✔
1889
                       shared_buffer_ref& sbr,
1890
                       scan_batch_context& sbc) override
1891
    {
1892
        auto p = logfmt::parser(sbr.to_string_fragment());
1893
        scan_result_t retval = scan_no_match{};
1894
        bool done = false;
10,177✔
1895
        logfmt_pair_handler lph(this->lf_date_time);
10,177✔
1896

10,177✔
1897
        if (dst.empty()) {
10,177✔
1898
            auto file_options = lf.get_file_options();
1899

10,177✔
1900
            if (file_options) {
973✔
1901
                this->lf_date_time.dts_default_zone
1902
                    = file_options->second.fo_default_zone.pp_value;
973✔
1903
            } else {
1904
                this->lf_date_time.dts_default_zone = nullptr;
35✔
1905
            }
1906
        }
938✔
1907

1908
        while (!done) {
973✔
1909
            auto parse_result = p.step();
1910

20,646✔
1911
            done = parse_result.match(
10,469✔
1912
                [](const logfmt::parser::end_of_input&) { return true; },
1913
                [&lph](const logfmt::parser::kvpair& kvp) {
10,469✔
1914
                    lph.lph_key_frag = kvp.first;
587✔
1915

1,969✔
1916
                    return kvp.second.match(
1,969✔
1917
                        [](const logfmt::parser::bool_value& bv) {
1918
                            return false;
3,938✔
1919
                        },
×
1920
                        [&lph](const logfmt::parser::float_value& fv) {
×
1921
                            return lph.process_value(fv.fv_str_value);
1922
                        },
×
1923
                        [&lph](const logfmt::parser::int_value& iv) {
×
1924
                            return lph.process_value(iv.iv_str_value);
1925
                        },
16✔
1926
                        [&lph](const logfmt::parser::quoted_value& qv) {
16✔
1927
                            auto_mem<yajl_handle_t> handle(yajl_free);
1928
                            yajl_callbacks cb;
584✔
1929

292✔
1930
                            memset(&cb, 0, sizeof(cb));
1931
                            handle = yajl_alloc(&cb, nullptr, &lph);
1932
                            cb.yajl_string = +[](void* ctx,
292✔
1933
                                                 const unsigned char* str,
292✔
1934
                                                 size_t len) -> int {
584✔
1935
                                auto& lph = *((logfmt_pair_handler*) ctx);
1936
                                string_fragment value_frag{str, 0, (int) len};
1937

292✔
1938
                                return lph.process_value(value_frag);
292✔
1939
                            };
1940

292✔
1941
                            if (yajl_parse(
292✔
1942
                                    handle,
1943
                                    (const unsigned char*) qv.qv_value.data(),
292✔
1944
                                    qv.qv_value.length())
1945
                                    != yajl_status_ok
292✔
1946
                                || yajl_complete_parse(handle)
292✔
1947
                                    != yajl_status_ok)
1948
                            {
292✔
1949
                                log_debug("json parsing failed");
1950
                                string_fragment unq_frag{
1951
                                    qv.qv_value.sf_string,
×
1952
                                    qv.qv_value.sf_begin + 1,
1953
                                    qv.qv_value.sf_end - 1,
×
1954
                                };
×
1955

×
1956
                                return lph.process_value(unq_frag);
1957
                            }
1958

×
1959
                            return false;
1960
                        },
1961
                        [&lph](const logfmt::parser::unquoted_value& uv) {
292✔
1962
                            return lph.process_value(uv.uv_value);
292✔
1963
                        });
1,661✔
1964
                },
1,661✔
1965
                [](const logfmt::parser::error& err) {
3,938✔
1966
                    // log_error("logfmt parse error: %s", err.e_msg.c_str());
1967
                    return true;
7,913✔
1968
                });
1969
        }
7,913✔
1970

1971
        if (lph.lph_found_time) {
10,469✔
1972
            dst.emplace_back(
1973
                li.li_file_range.fr_offset, lph.lph_tv, lph.lph_level);
10,177✔
1974
            retval = scan_match{2000};
18✔
1975
        }
18✔
1976

18✔
1977
        return retval;
1978
    }
1979

20,354✔
1980
    void annotate(logfile* lf,
×
1981
                  uint64_t line_number,
1982
                  string_attrs_t& sa,
6✔
1983
                  logline_value_vector& values,
1984
                  bool annotate_module) const override
1985
    {
1986
        static const auto FIELDS_NAME = intern_string::lookup("fields");
1987

1988
        auto& sbr = values.lvv_sbr;
6✔
1989
        auto p = logfmt::parser(sbr.to_string_fragment());
1990
        bool done = false;
6✔
1991

6✔
1992
        while (!done) {
6✔
1993
            auto parse_result = p.step();
1994

52✔
1995
            done = parse_result.match(
46✔
1996
                [](const logfmt::parser::end_of_input&) { return true; },
1997
                [this, &sa, &values](const logfmt::parser::kvpair& kvp) {
46✔
1998
                    auto value_frag = kvp.second.match(
6✔
1999
                        [this, &kvp, &values](
96✔
2000
                            const logfmt::parser::bool_value& bv) {
40✔
2001
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2002
                                                              kvp.first),
×
2003
                                                          value_kind_t::
×
2004
                                                              VALUE_INTEGER,
×
2005
                                                          logline_value_meta::
2006
                                                              table_column{0},
2007
                                                          (log_format*) this}
2008
                                           .with_struct_name(FIELDS_NAME);
×
2009
                            values.lvv_values.emplace_back(lvm, bv.bv_value);
×
2010

×
2011
                            return bv.bv_str_value;
×
2012
                        },
2013
                        [this, &kvp, &values](
×
2014
                            const logfmt::parser::int_value& iv) {
×
2015
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2016
                                                              kvp.first),
×
2017
                                                          value_kind_t::
×
2018
                                                              VALUE_INTEGER,
×
2019
                                                          logline_value_meta::
2020
                                                              table_column{0},
2021
                                                          (log_format*) this}
2022
                                           .with_struct_name(FIELDS_NAME);
×
2023
                            values.lvv_values.emplace_back(lvm, iv.iv_value);
×
2024

×
2025
                            return iv.iv_str_value;
×
2026
                        },
2027
                        [this, &kvp, &values](
×
2028
                            const logfmt::parser::float_value& fv) {
×
2029
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2030
                                                              kvp.first),
×
2031
                                                          value_kind_t::
×
2032
                                                              VALUE_INTEGER,
×
2033
                                                          logline_value_meta::
2034
                                                              table_column{0},
2035
                                                          (log_format*) this}
2036
                                           .with_struct_name(FIELDS_NAME);
×
2037
                            values.lvv_values.emplace_back(lvm, fv.fv_value);
×
2038

×
2039
                            return fv.fv_str_value;
×
2040
                        },
2041
                        [](const logfmt::parser::quoted_value& qv) {
×
2042
                            return qv.qv_value;
×
2043
                        },
13✔
2044
                        [](const logfmt::parser::unquoted_value& uv) {
13✔
2045
                            return uv.uv_value;
2046
                        });
27✔
2047
                    auto value_lr
27✔
2048
                        = line_range{value_frag.sf_begin, value_frag.sf_end};
2049

2050
                    if (kvp.first == "time" || kvp.first == "ts") {
40✔
2051
                        sa.emplace_back(value_lr, logline::L_TIMESTAMP.value());
2052
                    } else if (kvp.first == "level") {
40✔
2053
                    } else if (kvp.first == "msg") {
6✔
2054
                        sa.emplace_back(value_lr, SA_BODY.value());
34✔
2055
                    } else if (kvp.second.is<logfmt::parser::quoted_value>()
28✔
2056
                               || kvp.second
6✔
2057
                                      .is<logfmt::parser::unquoted_value>())
22✔
2058
                    {
43✔
2059
                        auto lvm
21✔
2060
                            = logline_value_meta{intern_string::lookup(
2061
                                                     kvp.first),
2062
                                                 value_frag.startswith("\"")
66✔
2063
                                                     ? value_kind_t::VALUE_JSON
22✔
2064
                                                     : value_kind_t::VALUE_TEXT,
22✔
2065
                                                 logline_value_meta::
2066
                                                     table_column{0},
2067
                                                 (log_format*) this}
2068
                                  .with_struct_name(FIELDS_NAME);
22✔
2069
                        values.lvv_values.emplace_back(lvm, value_frag);
44✔
2070
                    }
22✔
2071

22✔
2072
                    return false;
22✔
2073
                },
2074
                [line_number, &sbr](const logfmt::parser::error& err) {
40✔
2075
                    log_error("bad line %.*s", sbr.length(), sbr.get_data());
2076
                    log_error("%lld:logfmt parse error: %s",
×
2077
                              line_number,
×
2078
                              err.e_msg.c_str());
×
2079
                    return true;
2080
                });
2081
        }
×
2082

2083
        log_format::annotate(lf, line_number, sa, values, annotate_module);
46✔
2084
    }
2085

6✔
2086
    std::shared_ptr<log_format> specialized(int fmt_lock) override
6✔
2087
    {
2088
        auto retval = std::make_shared<logfmt_format>(*this);
3✔
2089

2090
        retval->lf_specialized = true;
3✔
2091
        return retval;
2092
    }
3✔
2093
};
6✔
2094

3✔
2095
static auto format_binder = injector::bind_multiple<log_format>()
2096
                                .add<logfmt_format>()
2097
                                .add<bro_log_format>()
2098
                                .add<w3c_log_format>()
2099
                                .add<generic_log_format>()
2100
                                .add<piper_log_format>();
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc