• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tstack / lnav / 17589970077-2502

09 Sep 2025 05:00PM UTC coverage: 65.196% (-5.0%) from 70.225%
17589970077-2502

push

github

tstack
[format] add fields for source file/line

Knowing the source file/line context in a log
message can help find log messages when using
log2src.

56 of 70 new or added lines in 2 files covered. (80.0%)

13954 existing lines in 210 files now uncovered.

45516 of 69814 relevant lines covered (65.2%)

404154.37 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.75
/src/log_format_impls.cc
1
/**
2
 * Copyright (c) 2007-2017, Timothy Stack
3
 *
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * * Redistributions of source code must retain the above copyright notice, this
10
 * list of conditions and the following disclaimer.
11
 * * Redistributions in binary form must reproduce the above copyright notice,
12
 * this list of conditions and the following disclaimer in the documentation
13
 * and/or other materials provided with the distribution.
14
 * * Neither the name of Timothy Stack nor the names of its contributors
15
 * may be used to endorse or promote products derived from this software
16
 * without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 *
29
 * @file log_format_impls.cc
30
 */
31

32
#include <algorithm>
33
#include <chrono>
34
#include <memory>
35
#include <utility>
36

37
#include "log_format.hh"
38

39
#include <stdio.h>
40

41
#include "base/injector.bind.hh"
42
#include "base/opt_util.hh"
43
#include "config.h"
44
#include "formats/logfmt/logfmt.parser.hh"
45
#include "log_vtab_impl.hh"
46
#include "ptimec.hh"
47
#include "scn/scan.h"
48
#include "sql_util.hh"
49
#include "yajlpp/yajlpp.hh"
50

51
class piper_log_format : public log_format {
52
public:
53
    const intern_string_t get_name() const override
12,957✔
54
    {
55
        static const intern_string_t RETVAL
56
            = intern_string::lookup("lnav_piper_log");
14,343✔
57

58
        return RETVAL;
12,957✔
59
    }
60

61
    scan_result_t scan(logfile& lf,
10,417✔
62
                       std::vector<logline>& dst,
63
                       const line_info& li,
64
                       shared_buffer_ref& sbr,
65
                       scan_batch_context& sbc) override
66
    {
67
        if (lf.has_line_metadata()
10,417✔
68
            && lf.get_text_format() == text_format_t::TF_LOG)
10,417✔
69
        {
70
            dst.emplace_back(
213✔
71
                li.li_file_range.fr_offset, li.li_timestamp, li.li_level);
213✔
72
            return scan_match{1};
213✔
73
        }
74

75
        return scan_no_match{"not a piper capture"};
10,204✔
76
    }
77

78
    static constexpr int TIMESTAMP_SIZE = 28;
79

80
    void annotate(logfile* lf,
41✔
81
                  uint64_t line_number,
82
                  string_attrs_t& sa,
83
                  logline_value_vector& values,
84
                  bool annotate_module) const override
85
    {
86
        auto lr = line_range{0, TIMESTAMP_SIZE};
41✔
87
        sa.emplace_back(lr, L_TIMESTAMP.value());
41✔
88
        log_format::annotate(lf, line_number, sa, values, annotate_module);
41✔
89
    }
41✔
90

91
    void get_subline(const logline& ll,
317✔
92
                     shared_buffer_ref& sbr,
93
                     subline_options opts) override
94
    {
95
        this->plf_cached_line.resize(TIMESTAMP_SIZE);
317✔
96
        auto tlen = sql_strftime(this->plf_cached_line.data(),
317✔
97
                                 this->plf_cached_line.size(),
98
                                 ll.get_timeval(),
317✔
99
                                 'T');
100
        this->plf_cached_line.resize(tlen);
317✔
101
        {
102
            char zone_str[16];
103
            exttm tmptm;
317✔
104

105
            tmptm.et_flags |= ETF_ZONE_SET;
317✔
106
            tmptm.et_gmtoff
107
                = lnav::local_time_to_info(
634✔
108
                      date::local_seconds{ll.get_time<std::chrono::seconds>()})
317✔
109
                      .first.offset.count();
317✔
110
            off_t zone_len = 0;
317✔
111
            ftime_z(zone_str, zone_len, sizeof(zone_str), tmptm);
317✔
112
            for (off_t lpc = 0; lpc < zone_len; lpc++) {
1,902✔
113
                this->plf_cached_line.push_back(zone_str[lpc]);
1,585✔
114
            }
115
        }
116
        this->plf_cached_line.push_back(' ');
317✔
117
        const auto prefix_len = this->plf_cached_line.size();
317✔
118
        this->plf_cached_line.resize(this->plf_cached_line.size()
634✔
119
                                     + sbr.length());
317✔
120
        memcpy(
317✔
121
            &this->plf_cached_line[prefix_len], sbr.get_data(), sbr.length());
317✔
122

123
        sbr.share(this->plf_share_manager,
634✔
124
                  this->plf_cached_line.data(),
317✔
125
                  this->plf_cached_line.size());
126
    }
317✔
127

128
    std::shared_ptr<log_format> specialized(int fmt_lock) override
6✔
129
    {
130
        auto retval = std::make_shared<piper_log_format>(*this);
6✔
131

132
        retval->lf_specialized = true;
6✔
133
        retval->lf_timestamp_flags |= ETF_ZONE_SET | ETF_MICROS_SET;
6✔
134
        return retval;
12✔
135
    }
6✔
136

137
private:
138
    shared_buffer plf_share_manager;
139
    std::vector<char> plf_cached_line;
140
};
141

142
class generic_log_format : public log_format {
143
public:
144
    static const pcre_format* get_pcre_log_formats()
10,458✔
145
    {
146
        static const pcre_format log_fmt[] = {
147
            pcre_format(
148
                "^(?:\\*\\*\\*\\s+)?(?<timestamp>@[0-9a-zA-Z]{16,24})(.*)"),
149
            pcre_format(
150
                R"(^(?:\*\*\*\s+)?(?<timestamp>(?:\s|\d{4}[\-\/]\d{2}[\-\/]\d{2}|T|\d{1,2}:\d{2}(?::\d{2}(?:[\.,]\d{1,6})?)?|Z|[+\-]\d{2}:?\d{2}|(?!DBG|ERR|INFO|WARN|NONE)[A-Z]{3,4})+)(?:\s+|[:|])([^:]+))"),
151
            pcre_format(
152
                "^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w:+/\\.-]+) \\[\\w (.*)"),
153
            pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w:,/\\.-]+) (.*)"),
154
            pcre_format(
155
                "^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w:,/\\.-]+) - (.*)"),
156
            pcre_format(
157
                "^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w: \\.,/-]+) - (.*)"),
158
            pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w: "
159
                        "\\.,/-]+)\\[[^\\]]+\\](.*)"),
160
            pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w: \\.,/-]+) (.*)"),
161

162
            pcre_format(
163
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w: \.,+/-]+)\]\s*(\w+):?)"),
164
            pcre_format(
165
                "^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: \\.,+/-]+)\\] (.*)"),
166
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: "
167
                        "\\.,+/-]+)\\] \\[(\\w+)\\]"),
168
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: "
169
                        "\\.,+/-]+)\\] \\w+ (.*)"),
170
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: ,+/-]+)\\] "
171
                        "\\(\\d+\\) (.*)"),
172

173
            pcre_format(),
174
        };
10,458✔
175

176
        return log_fmt;
10,458✔
177
    }
178

UNCOV
179
    std::string get_pattern_regex(uint64_t line_number) const override
×
180
    {
UNCOV
181
        int pat_index = this->pattern_index_for_line(line_number);
×
UNCOV
182
        return get_pcre_log_formats()[pat_index].name;
×
183
    }
184

185
    const intern_string_t get_name() const override
12,626✔
186
    {
187
        static const intern_string_t RETVAL
188
            = intern_string::lookup("generic_log");
14,012✔
189

190
        return RETVAL;
12,626✔
191
    }
192

193
    scan_result_t scan(logfile& lf,
10,380✔
194
                       std::vector<logline>& dst,
195
                       const line_info& li,
196
                       shared_buffer_ref& sbr,
197
                       scan_batch_context& sbc) override
198
    {
199
        exttm log_time;
10,380✔
200
        timeval log_tv;
201
        string_fragment ts;
10,380✔
202
        std::optional<string_fragment> level;
10,380✔
203
        const char* last_pos;
204

205
        if (dst.empty()) {
10,380✔
206
            auto file_options = lf.get_file_options();
186✔
207

208
            if (file_options) {
186✔
209
                this->lf_date_time.dts_default_zone
210
                    = file_options->second.fo_default_zone.pp_value;
2✔
211
            } else {
212
                this->lf_date_time.dts_default_zone = nullptr;
184✔
213
            }
214
        }
186✔
215

216
        if ((last_pos = this->log_scanf(dst.size(),
10,380✔
217
                                        sbr.to_string_fragment(),
218
                                        get_pcre_log_formats(),
219
                                        nullptr,
220
                                        &log_time,
221
                                        &log_tv,
222

223
                                        &ts,
224
                                        &level))
225
            != nullptr)
10,380✔
226
        {
227
            auto level_val = log_level_t::LEVEL_UNKNOWN;
1,489✔
228
            if (level) {
1,489✔
229
                level_val = string2level(level->data(), level->length());
1,489✔
230
            }
231

232
            if (!((log_time.et_flags & ETF_DAY_SET)
1,489✔
233
                  && (log_time.et_flags & ETF_MONTH_SET)
1,414✔
234
                  && (log_time.et_flags & ETF_YEAR_SET)))
1,414✔
235
            {
236
                this->check_for_new_year(dst, log_time, log_tv);
676✔
237
            }
238

239
            if (!(this->lf_timestamp_flags
2,978✔
240
                  & (ETF_MILLIS_SET | ETF_MICROS_SET | ETF_NANOS_SET))
1,489✔
241
                && !dst.empty()
1,138✔
242
                && dst.back().get_time<std::chrono::seconds>().count()
1,136✔
243
                    == log_tv.tv_sec
1,136✔
244
                && dst.back()
3,695✔
245
                        .get_subsecond_time<std::chrono::microseconds>()
2,557✔
246
                        .count()
1,068✔
247
                    != 0)
248
            {
249
                auto log_ms
250
                    = dst.back()
×
UNCOV
251
                          .get_subsecond_time<std::chrono::microseconds>();
×
252

253
                log_time.et_nsec
UNCOV
254
                    = std::chrono::duration_cast<std::chrono::nanoseconds>(
×
255
                          log_ms)
UNCOV
256
                          .count();
×
257
                log_tv.tv_usec
UNCOV
258
                    = std::chrono::duration_cast<std::chrono::microseconds>(
×
259
                          log_ms)
UNCOV
260
                          .count();
×
261
            }
262

263
            dst.emplace_back(li.li_file_range.fr_offset, log_tv, level_val);
1,489✔
264
            return scan_match{5};
1,489✔
265
        }
266

267
        return scan_no_match{"no patterns matched"};
8,891✔
268
    }
269

270
    void annotate(logfile* lf,
78✔
271
                  uint64_t line_number,
272
                  string_attrs_t& sa,
273
                  logline_value_vector& values,
274
                  bool annotate_module) const override
275
    {
276
        auto& line = values.lvv_sbr;
78✔
277
        int pat_index = this->pattern_index_for_line(line_number);
78✔
278
        const auto& fmt = get_pcre_log_formats()[pat_index];
78✔
279
        int prefix_len = 0;
78✔
280
        auto md = fmt.pcre->create_match_data();
78✔
281
        auto match_res = fmt.pcre->capture_from(line.to_string_fragment())
78✔
282
                             .into(md)
78✔
283
                             .matches(PCRE2_NO_UTF_CHECK)
156✔
284
                             .ignore_error();
78✔
285
        if (!match_res) {
78✔
286
            return;
2✔
287
        }
288

289
        auto ts_cap = md[fmt.pf_timestamp_index].value();
76✔
290
        auto lr = to_line_range(ts_cap.trim());
76✔
291
        sa.emplace_back(lr, L_TIMESTAMP.value());
76✔
292

293
        values.lvv_values.emplace_back(TS_META, line, lr);
76✔
294
        values.lvv_values.back().lv_meta.lvm_format = (log_format*) this;
76✔
295

296
        prefix_len = ts_cap.sf_end;
76✔
297
        auto level_cap = md[2];
76✔
298
        if (level_cap) {
76✔
299
            if (string2level(level_cap->data(), level_cap->length(), true)
76✔
300
                != LEVEL_UNKNOWN)
76✔
301
            {
302
                prefix_len = level_cap->sf_end;
65✔
303

304
                values.lvv_values.emplace_back(
65✔
305
                    LEVEL_META, line, to_line_range(level_cap->trim()));
65✔
306
                values.lvv_values.back().lv_meta.lvm_format
65✔
307
                    = (log_format*) this;
65✔
308

309
                lr = to_line_range(level_cap->trim());
65✔
310
                if (lr.lr_end != (ssize_t) line.length()) {
65✔
311
                    sa.emplace_back(lr, L_LEVEL.value());
65✔
312
                }
313
            }
314
        }
315

316
        lr.lr_start = 0;
76✔
317
        lr.lr_end = prefix_len;
76✔
318
        sa.emplace_back(lr, L_PREFIX.value());
76✔
319

320
        lr.lr_start = prefix_len;
76✔
321
        lr.lr_end = line.length();
76✔
322
        sa.emplace_back(lr, SA_BODY.value());
76✔
323

324
        log_format::annotate(lf, line_number, sa, values, annotate_module);
76✔
325
    }
78✔
326

327
    std::shared_ptr<log_format> specialized(int fmt_lock) override
48✔
328
    {
329
        auto retval = std::make_shared<generic_log_format>(*this);
48✔
330

331
        retval->lf_specialized = true;
48✔
332
        return retval;
96✔
333
    }
48✔
334

335
    bool hide_field(const intern_string_t field_name, bool val) override
2✔
336
    {
337
        if (field_name == TS_META.lvm_name) {
2✔
338
            TS_META.lvm_user_hidden = val;
1✔
339
            return true;
1✔
340
        }
341
        if (field_name == LEVEL_META.lvm_name) {
1✔
342
            LEVEL_META.lvm_user_hidden = val;
1✔
343
            return true;
1✔
344
        }
UNCOV
345
        if (field_name == OPID_META.lvm_name) {
×
UNCOV
346
            OPID_META.lvm_user_hidden = val;
×
UNCOV
347
            return true;
×
348
        }
UNCOV
349
        return false;
×
350
    }
351

352
    std::map<intern_string_t, logline_value_meta> get_field_states() override
119✔
353
    {
354
        return {
355
            {TS_META.lvm_name, TS_META},
356
            {LEVEL_META.lvm_name, LEVEL_META},
357
            {OPID_META.lvm_name, OPID_META},
358
        };
595✔
359
    }
119✔
360

361
private:
362
    static logline_value_meta TS_META;
363
    static logline_value_meta LEVEL_META;
364
    static logline_value_meta OPID_META;
365
};
366

367
logline_value_meta generic_log_format::TS_META{
368
    intern_string::lookup("log_time"),
369
    value_kind_t::VALUE_TEXT,
370
    logline_value_meta::table_column{2},
371
};
372

373
logline_value_meta generic_log_format::LEVEL_META{
374
    intern_string::lookup("log_level"),
375
    value_kind_t::VALUE_TEXT,
376
    logline_value_meta::table_column{3},
377
};
378

379
logline_value_meta generic_log_format::OPID_META{
380
    intern_string::lookup("log_opid"),
381
    value_kind_t::VALUE_TEXT,
382
    logline_value_meta::internal_column{},
383
};
384

385
std::string
386
from_escaped_string(const char* str, size_t len)
22✔
387
{
388
    std::string retval;
22✔
389

390
    for (size_t lpc = 0; lpc < len; lpc++) {
44✔
391
        switch (str[lpc]) {
22✔
392
            case '\\':
22✔
393
                if ((lpc + 3) < len && str[lpc + 1] == 'x') {
22✔
394
                    int ch;
395

396
                    if (sscanf(&str[lpc + 2], "%2x", &ch) == 1) {
22✔
397
                        retval.append(1, (char) ch & 0xff);
22✔
398
                        lpc += 3;
22✔
399
                    }
400
                }
401
                break;
22✔
UNCOV
402
            default:
×
UNCOV
403
                retval.append(1, str[lpc]);
×
UNCOV
404
                break;
×
405
        }
406
    }
407

408
    return retval;
22✔
UNCOV
409
}
×
410

411
std::optional<const char*>
412
lnav_strnstr(const char* s, const char* find, size_t slen)
1,572,454✔
413
{
414
    char c, sc;
415
    size_t len;
416

417
    if ((c = *find++) != '\0') {
1,572,454✔
418
        len = strlen(find);
1,572,454✔
419
        do {
420
            do {
421
                if (slen < 1 || (sc = *s) == '\0') {
6,720,699✔
422
                    return std::nullopt;
853,862✔
423
                }
424
                --slen;
5,866,837✔
425
                ++s;
5,866,837✔
426
            } while (sc != c);
5,866,837✔
427
            if (len > slen) {
718,592✔
UNCOV
428
                return std::nullopt;
×
429
            }
430
        } while (strncmp(s, find, len) != 0);
718,592✔
431
        s--;
718,592✔
432
    }
433
    return s;
718,592✔
434
}
435

436
struct separated_string {
437
    const char* ss_str;
438
    size_t ss_len;
439
    const char* ss_separator;
440
    size_t ss_separator_len;
441

442
    separated_string(const char* str, size_t len)
34,035✔
443
        : ss_str(str), ss_len(len), ss_separator(","),
34,035✔
444
          ss_separator_len(strlen(this->ss_separator))
34,035✔
445
    {
446
    }
34,035✔
447

448
    separated_string& with_separator(const char* sep)
34,035✔
449
    {
450
        this->ss_separator = sep;
34,035✔
451
        this->ss_separator_len = strlen(sep);
34,035✔
452
        return *this;
34,035✔
453
    }
454

455
    struct iterator {
456
        const separated_string& i_parent;
457
        const char* i_pos;
458
        const char* i_next_pos;
459
        size_t i_index;
460

461
        iterator(const separated_string& ss, const char* pos)
820,185✔
462
            : i_parent(ss), i_pos(pos), i_next_pos(pos), i_index(0)
820,185✔
463
        {
464
            this->update();
820,185✔
465
        }
820,185✔
466

467
        void update()
1,572,454✔
468
        {
469
            const separated_string& ss = this->i_parent;
1,572,454✔
470
            auto next_field
471
                = lnav_strnstr(this->i_pos,
1,572,454✔
472
                               ss.ss_separator,
1,572,454✔
473
                               ss.ss_len - (this->i_pos - ss.ss_str));
1,572,454✔
474
            if (next_field) {
1,572,454✔
475
                this->i_next_pos = next_field.value() + ss.ss_separator_len;
718,592✔
476
            } else {
477
                this->i_next_pos = ss.ss_str + ss.ss_len;
853,862✔
478
            }
479
        }
1,572,454✔
480

481
        iterator& operator++()
752,269✔
482
        {
483
            this->i_pos = this->i_next_pos;
752,269✔
484
            this->update();
752,269✔
485
            this->i_index += 1;
752,269✔
486

487
            return *this;
752,269✔
488
        }
489

490
        string_fragment operator*()
675,465✔
491
        {
492
            const auto& ss = this->i_parent;
675,465✔
493
            int end;
494

495
            if (this->i_next_pos < (ss.ss_str + ss.ss_len)) {
675,465✔
496
                end = this->i_next_pos - ss.ss_str - ss.ss_separator_len;
645,824✔
497
            } else {
498
                end = this->i_next_pos - ss.ss_str;
29,641✔
499
            }
500
            return string_fragment::from_byte_range(
675,465✔
501
                ss.ss_str, this->i_pos - ss.ss_str, end);
675,465✔
502
        }
503

504
        bool operator==(const iterator& other) const
786,150✔
505
        {
506
            return (&this->i_parent == &other.i_parent)
786,150✔
507
                && (this->i_pos == other.i_pos);
786,150✔
508
        }
509

510
        bool operator!=(const iterator& other) const
785,996✔
511
        {
512
            return !(*this == other);
785,996✔
513
        }
514

515
        size_t index() const { return this->i_index; }
1,621,336✔
516
    };
517

518
    iterator begin() { return {*this, this->ss_str}; }
34,035✔
519

520
    iterator end() { return {*this, this->ss_str + this->ss_len}; }
786,150✔
521
};
522

523
class bro_log_format : public log_format {
524
public:
525
    static const intern_string_t TS;
526
    struct field_def {
527
        logline_value_meta fd_meta;
528
        logline_value_meta* fd_root_meta;
529
        std::string fd_collator;
530
        std::optional<size_t> fd_numeric_index;
531

532
        explicit field_def(const intern_string_t name,
622✔
533
                           size_t col,
534
                           log_format* format)
535
            : fd_meta(name,
1,244✔
536
                      value_kind_t::VALUE_TEXT,
537
                      logline_value_meta::table_column{col},
622✔
538
                      format),
539
              fd_root_meta(&FIELD_META.find(name)->second)
622✔
540
        {
541
        }
622✔
542

543
        field_def& with_kind(value_kind_t kind,
458✔
544
                             bool identifier = false,
545
                             bool foreign_key = false,
546
                             const std::string& collator = "")
547
        {
548
            this->fd_meta.lvm_kind = kind;
458✔
549
            this->fd_meta.lvm_identifier = identifier;
458✔
550
            this->fd_meta.lvm_foreign_key = foreign_key;
458✔
551
            this->fd_collator = collator;
458✔
552
            return *this;
458✔
553
        }
554

555
        field_def& with_numeric_index(size_t index)
116✔
556
        {
557
            this->fd_numeric_index = index;
116✔
558
            return *this;
116✔
559
        }
560
    };
561

562
    static std::unordered_map<const intern_string_t, logline_value_meta>
563
        FIELD_META;
564

565
    static const intern_string_t get_opid_desc()
2,565✔
566
    {
567
        static const intern_string_t RETVAL = intern_string::lookup("std");
3,963✔
568

569
        return RETVAL;
2,565✔
570
    }
571

572
    bro_log_format()
699✔
573
    {
699✔
574
        this->lf_structured = true;
699✔
575
        this->lf_is_self_describing = true;
699✔
576
        this->lf_time_ordered = false;
699✔
577

578
        auto desc_v = std::make_shared<std::vector<opid_descriptor>>();
699✔
579
        desc_v->emplace({});
699✔
580
        this->lf_opid_description_def->emplace(get_opid_desc(),
1,398✔
581
                                               opid_descriptors{desc_v});
1,398✔
582
    }
699✔
583

584
    const intern_string_t get_name() const override
113,977✔
585
    {
586
        static const intern_string_t name(intern_string::lookup("bro"));
115,363✔
587

588
        return this->blf_format_name.empty() ? name : this->blf_format_name;
113,977✔
589
    }
590

591
    void clear() override
10,439✔
592
    {
593
        this->log_format::clear();
10,439✔
594
        this->blf_format_name.clear();
10,439✔
595
        this->blf_field_defs.clear();
10,439✔
596
    }
10,439✔
597

598
    scan_result_t scan_int(std::vector<logline>& dst,
4,168✔
599
                           const line_info& li,
600
                           shared_buffer_ref& sbr,
601
                           scan_batch_context& sbc)
602
    {
603
        static const intern_string_t STATUS_CODE
604
            = intern_string::lookup("bro_status_code");
4,210✔
605
        static const intern_string_t UID = intern_string::lookup("bro_uid");
4,210✔
606
        static const intern_string_t ID_ORIG_H
607
            = intern_string::lookup("bro_id_orig_h");
4,210✔
608

609
        separated_string ss(sbr.get_data(), sbr.length());
4,168✔
610
        struct timeval tv;
611
        struct exttm tm;
4,168✔
612
        bool found_ts = false;
4,168✔
613
        log_level_t level = LEVEL_INFO;
4,168✔
614
        uint8_t opid = 0;
4,168✔
615
        auto opid_cap = string_fragment::invalid();
4,168✔
616
        auto host_cap = string_fragment::invalid();
4,168✔
617

618
        ss.with_separator(this->blf_separator.get());
4,168✔
619

620
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
122,914✔
621
            if (iter.index() == 0 && *iter == "#close") {
118,768✔
622
                return scan_match{2000};
22✔
623
            }
624

625
            if (iter.index() >= this->blf_field_defs.size()) {
118,746✔
UNCOV
626
                break;
×
627
            }
628

629
            const auto& fd = this->blf_field_defs[iter.index()];
118,746✔
630

631
            if (TS == fd.fd_meta.lvm_name) {
118,746✔
632
                static const char* const TIME_FMT[] = {"%s.%f"};
633
                const auto sf = *iter;
4,146✔
634

635
                if (this->lf_date_time.scan(
4,146✔
636
                        sf.data(), sf.length(), TIME_FMT, &tm, tv))
4,146✔
637
                {
638
                    this->lf_timestamp_flags = tm.et_flags;
4,146✔
639
                    found_ts = true;
4,146✔
640
                }
641
            } else if (STATUS_CODE == fd.fd_meta.lvm_name) {
114,600✔
642
                const auto sf = *iter;
3,960✔
643

644
                if (!sf.empty() && sf[0] >= '4') {
3,960✔
645
                    level = LEVEL_ERROR;
20✔
646
                }
647
            } else if (UID == fd.fd_meta.lvm_name) {
110,640✔
648
                opid_cap = *iter;
4,146✔
649

650
                opid = hash_str(opid_cap.data(), opid_cap.length());
4,146✔
651
            } else if (ID_ORIG_H == fd.fd_meta.lvm_name) {
106,494✔
652
                host_cap = *iter;
4,146✔
653
            }
654

655
            if (fd.fd_numeric_index) {
118,746✔
656
                switch (fd.fd_meta.lvm_kind) {
21,288✔
657
                    case value_kind_t::VALUE_INTEGER:
21,288✔
658
                    case value_kind_t::VALUE_FLOAT: {
659
                        const auto sv = (*iter).to_string_view();
21,288✔
660
                        auto scan_float_res = scn::scan_value<double>(sv);
21,288✔
661
                        if (scan_float_res) {
21,288✔
662
                            this->lf_value_stats[fd.fd_numeric_index.value()]
17,328✔
663
                                .add_value(scan_float_res->value());
17,328✔
664
                        }
665
                        break;
21,288✔
666
                    }
667
                    default:
×
UNCOV
668
                        break;
×
669
                }
670
            }
671
        }
672

673
        if (found_ts) {
4,146✔
674
            if (!this->lf_specialized) {
4,146✔
675
                for (auto& ll : dst) {
198✔
676
                    ll.set_ignore(true);
176✔
677
                }
678
            }
679

680
            if (opid_cap.is_valid()) {
4,146✔
681
                auto opid_iter
682
                    = sbc.sbc_opids.insert_op(sbc.sbc_allocator, opid_cap, tv);
4,146✔
683
                opid_iter->second.otr_level_stats.update_msg_count(level);
4,146✔
684

685
                auto& otr = opid_iter->second;
4,146✔
686
                if (!otr.otr_description.lod_id && host_cap.is_valid()
6,012✔
687
                    && otr.otr_description.lod_elements.empty())
6,012✔
688
                {
689
                    otr.otr_description.lod_id = get_opid_desc();
1,866✔
690
                    otr.otr_description.lod_elements.emplace_back(
3,732✔
691
                        0, host_cap.to_string());
1,866✔
692
                }
693
            }
694
            dst.emplace_back(li.li_file_range.fr_offset, tv, level, 0, opid);
4,146✔
695
            return scan_match{2000};
4,146✔
696
        }
UNCOV
697
        return scan_no_match{"no header found"};
×
698
    }
699

700
    scan_result_t scan(logfile& lf,
10,417✔
701
                       std::vector<logline>& dst,
702
                       const line_info& li,
703
                       shared_buffer_ref& sbr,
704
                       scan_batch_context& sbc) override
705
    {
706
        static const auto SEP_RE
707
            = lnav::pcre2pp::code::from_const(R"(^#separator\s+(.+))");
10,417✔
708

709
        if (dst.empty()) {
10,417✔
710
            auto file_options = lf.get_file_options();
1,070✔
711

712
            if (file_options) {
1,070✔
713
                this->lf_date_time.dts_default_zone
714
                    = file_options->second.fo_default_zone.pp_value;
53✔
715
            } else {
716
                this->lf_date_time.dts_default_zone = nullptr;
1,017✔
717
            }
718
        }
1,070✔
719

720
        if (!this->blf_format_name.empty()) {
10,417✔
721
            return this->scan_int(dst, li, sbr, sbc);
4,146✔
722
        }
723

724
        if (dst.empty() || dst.size() > 20 || sbr.empty()
11,472✔
725
            || sbr.get_data()[0] == '#')
11,472✔
726
        {
727
            return scan_no_match{"no header found"};
3,962✔
728
        }
729

730
        auto line_iter = dst.begin();
2,309✔
731
        auto read_result = lf.read_line(line_iter);
2,309✔
732

733
        if (read_result.isErr()) {
2,309✔
UNCOV
734
            return scan_no_match{"unable to read first line"};
×
735
        }
736

737
        auto line = read_result.unwrap();
2,309✔
738
        auto md = SEP_RE.create_match_data();
2,309✔
739

740
        auto match_res = SEP_RE.capture_from(line.to_string_fragment())
2,309✔
741
                             .into(md)
2,309✔
742
                             .matches(PCRE2_NO_UTF_CHECK)
4,618✔
743
                             .ignore_error();
2,309✔
744
        if (!match_res) {
2,309✔
745
            return scan_no_match{"cannot read separator header"};
2,287✔
746
        }
747

748
        this->clear();
22✔
749

750
        auto sep = from_escaped_string(md[1]->data(), md[1]->length());
22✔
751
        this->blf_separator = intern_string::lookup(sep);
22✔
752

753
        for (++line_iter; line_iter != dst.end(); ++line_iter) {
176✔
754
            auto next_read_result = lf.read_line(line_iter);
154✔
755

756
            if (next_read_result.isErr()) {
154✔
UNCOV
757
                return scan_no_match{"unable to read header line"};
×
758
            }
759

760
            line = next_read_result.unwrap();
154✔
761
            separated_string ss(line.get_data(), line.length());
154✔
762

763
            ss.with_separator(this->blf_separator.get());
154✔
764
            auto iter = ss.begin();
154✔
765

766
            string_fragment directive = *iter;
154✔
767

768
            if (directive.empty() || directive[0] != '#') {
154✔
UNCOV
769
                continue;
×
770
            }
771

772
            ++iter;
154✔
773
            if (iter == ss.end()) {
154✔
UNCOV
774
                continue;
×
775
            }
776

777
            if (directive == "#set_separator") {
154✔
778
                this->blf_set_separator = intern_string::lookup(*iter);
22✔
779
            } else if (directive == "#empty_field") {
132✔
780
                this->blf_empty_field = intern_string::lookup(*iter);
22✔
781
            } else if (directive == "#unset_field") {
110✔
782
                this->blf_unset_field = intern_string::lookup(*iter);
22✔
783
            } else if (directive == "#path") {
88✔
784
                auto full_name = fmt::format(FMT_STRING("bro_{}_log"), *iter);
66✔
785
                this->blf_format_name = intern_string::lookup(full_name);
22✔
786
            } else if (directive == "#fields" && this->blf_field_defs.empty()) {
88✔
787
                do {
788
                    auto field_name
789
                        = intern_string::lookup("bro_" + sql_safe_ident(*iter));
622✔
790
                    auto common_iter = FIELD_META.find(field_name);
622✔
791
                    if (common_iter == FIELD_META.end()) {
622✔
792
                        FIELD_META.emplace(field_name,
616✔
793
                                           logline_value_meta{
1,232✔
794
                                               field_name,
795
                                               value_kind_t::VALUE_TEXT,
796
                                           });
797
                    }
798
                    this->blf_field_defs.emplace_back(
1,244✔
799
                        field_name, this->blf_field_defs.size(), this);
622✔
800
                    ++iter;
622✔
801
                } while (iter != ss.end());
622✔
802
            } else if (directive == "#types") {
44✔
803
                static const char* KNOWN_IDS[] = {
804
                    "bro_conn_uids",
805
                    "bro_fuid",
806
                    "bro_host",
807
                    "bro_info_code",
808
                    "bro_method",
809
                    "bro_mime_type",
810
                    "bro_orig_fuids",
811
                    "bro_parent_fuid",
812
                    "bro_proto",
813
                    "bro_referrer",
814
                    "bro_resp_fuids",
815
                    "bro_service",
816
                    "bro_uid",
817
                    "bro_uri",
818
                    "bro_user_agent",
819
                    "bro_username",
820
                };
821
                static const char* KNOWN_FOREIGN[] = {
822
                    "bro_status_code",
823
                };
824

825
                int numeric_count = 0;
22✔
826

827
                do {
828
                    string_fragment field_type = *iter;
622✔
829
                    auto& fd = this->blf_field_defs[iter.index() - 1];
622✔
830

831
                    if (field_type == "time") {
622✔
832
                        fd.with_kind(value_kind_t::VALUE_TIMESTAMP);
44✔
833
                    } else if (field_type == "string") {
600✔
834
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
456✔
835
                                                        std::end(KNOWN_IDS),
836
                                                        fd.fd_meta.lvm_name);
228✔
837
                        fd.with_kind(value_kind_t::VALUE_TEXT, ident);
456✔
838
                    } else if (field_type == "count") {
372✔
839
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
228✔
840
                                                        std::end(KNOWN_IDS),
841
                                                        fd.fd_meta.lvm_name);
114✔
842
                        bool foreign
843
                            = std::binary_search(std::begin(KNOWN_FOREIGN),
228✔
844
                                                 std::end(KNOWN_FOREIGN),
845
                                                 fd.fd_meta.lvm_name);
114✔
846
                        fd.with_kind(
228✔
847
                              value_kind_t::VALUE_INTEGER, ident, foreign)
848
                            .with_numeric_index(numeric_count);
114✔
849
                        numeric_count += 1;
114✔
850
                    } else if (field_type == "bool") {
258✔
851
                        fd.with_kind(value_kind_t::VALUE_BOOLEAN);
8✔
852
                    } else if (field_type == "addr") {
254✔
853
                        fd.with_kind(
88✔
854
                            value_kind_t::VALUE_TEXT, true, false, "ipaddress");
855
                    } else if (field_type == "port") {
210✔
856
                        fd.with_kind(value_kind_t::VALUE_INTEGER, true);
88✔
857
                    } else if (field_type == "interval") {
166✔
858
                        fd.with_kind(value_kind_t::VALUE_FLOAT)
4✔
859
                            .with_numeric_index(numeric_count);
2✔
860
                        numeric_count += 1;
2✔
861
                    }
862

863
                    ++iter;
622✔
864
                } while (iter != ss.end());
622✔
865

866
                this->lf_value_stats.resize(numeric_count);
22✔
867
            }
868
        }
154✔
869

870
        if (!this->blf_format_name.empty() && !this->blf_separator.empty()
44✔
871
            && !this->blf_field_defs.empty())
44✔
872
        {
873
            return this->scan_int(dst, li, sbr, sbc);
22✔
874
        }
875

UNCOV
876
        this->blf_format_name.clear();
×
UNCOV
877
        this->lf_value_stats.clear();
×
878

UNCOV
879
        return scan_no_match{"no header found"};
×
880
    }
2,309✔
881

882
    void annotate(logfile* lf,
29,713✔
883
                  uint64_t line_number,
884
                  string_attrs_t& sa,
885
                  logline_value_vector& values,
886
                  bool annotate_module) const override
887
    {
888
        static const intern_string_t UID = intern_string::lookup("bro_uid");
29,737✔
889

890
        auto& sbr = values.lvv_sbr;
29,713✔
891
        separated_string ss(sbr.get_data(), sbr.length());
29,713✔
892

893
        ss.with_separator(this->blf_separator.get());
29,713✔
894

895
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
661,838✔
896
            if (iter.index() >= this->blf_field_defs.size()) {
632,329✔
897
                return;
204✔
898
            }
899

900
            const field_def& fd = this->blf_field_defs[iter.index()];
632,125✔
901
            string_fragment sf = *iter;
632,125✔
902

903
            if (sf == this->blf_empty_field) {
632,125✔
904
                sf.clear();
29,516✔
905
            } else if (sf == this->blf_unset_field) {
602,609✔
906
                sf.invalidate();
68,206✔
907
            }
908

909
            auto lr = line_range(sf.sf_begin, sf.sf_end);
632,125✔
910

911
            if (fd.fd_meta.lvm_name == TS) {
632,125✔
912
                sa.emplace_back(lr, L_TIMESTAMP.value());
29,713✔
913
            } else if (fd.fd_meta.lvm_name == UID) {
602,412✔
914
                sa.emplace_back(lr, L_OPID.value());
29,713✔
915
            }
916

917
            if (lr.is_valid()) {
632,125✔
918
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
563,919✔
919
            } else {
920
                values.lvv_values.emplace_back(fd.fd_meta);
68,206✔
921
            }
922
            values.lvv_values.back().lv_meta.lvm_user_hidden
632,125✔
923
                = fd.fd_root_meta->lvm_user_hidden;
632,125✔
924
        }
925

926
        log_format::annotate(lf, line_number, sa, values, annotate_module);
29,509✔
927
    }
928

929
    const logline_value_stats* stats_for_value(
33✔
930
        const intern_string_t& name) const override
931
    {
932
        const logline_value_stats* retval = nullptr;
33✔
933

934
        for (const auto& blf_field_def : this->blf_field_defs) {
495✔
935
            if (blf_field_def.fd_meta.lvm_name == name) {
495✔
936
                if (!blf_field_def.fd_numeric_index) {
33✔
UNCOV
937
                    break;
×
938
                }
939
                retval = &this->lf_value_stats[blf_field_def.fd_numeric_index
940
                                                   .value()];
33✔
941
                break;
33✔
942
            }
943
        }
944

945
        return retval;
33✔
946
    }
947

948
    bool hide_field(intern_string_t field_name, bool val) override
2✔
949
    {
950
        if (field_name == LOG_TIME_STR) {
2✔
UNCOV
951
            field_name = TS;
×
952
        }
953

954
        auto fd_iter = FIELD_META.find(field_name);
2✔
955
        if (fd_iter == FIELD_META.end()) {
2✔
UNCOV
956
            return false;
×
957
        }
958

959
        fd_iter->second.lvm_user_hidden = val;
2✔
960

961
        return true;
2✔
962
    }
963

964
    std::map<intern_string_t, logline_value_meta> get_field_states() override
119✔
965
    {
966
        std::map<intern_string_t, logline_value_meta> retval;
119✔
967

968
        for (const auto& fd : FIELD_META) {
467✔
969
            retval.emplace(fd.first, fd.second);
348✔
970
        }
971

972
        return retval;
119✔
UNCOV
973
    }
×
974

975
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
22✔
976
    {
977
        auto retval = std::make_shared<bro_log_format>(*this);
22✔
978

979
        retval->lf_specialized = true;
22✔
980
        return retval;
44✔
981
    }
22✔
982

983
    class bro_log_table : public log_format_vtab_impl {
984
    public:
985
        explicit bro_log_table(const bro_log_format& format)
20✔
986
            : log_format_vtab_impl(format), blt_format(format)
20✔
987
        {
988
        }
20✔
989

990
        void get_columns(std::vector<vtab_column>& cols) const override
29✔
991
        {
992
            for (const auto& fd : this->blt_format.blf_field_defs) {
854✔
993
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
825✔
994
                    fd.fd_meta.lvm_kind);
825✔
995

996
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
825✔
997
                                  type_pair.first,
998
                                  fd.fd_collator,
825✔
999
                                  false,
1,650✔
1000
                                  "",
1001
                                  type_pair.second);
1002
            }
1003
        }
29✔
1004

1005
        void get_foreign_keys(
10✔
1006
            std::unordered_set<std::string>& keys_inout) const override
1007
        {
1008
            this->log_vtab_impl::get_foreign_keys(keys_inout);
10✔
1009

1010
            for (const auto& fd : this->blt_format.blf_field_defs) {
292✔
1011
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
282✔
1012
                    keys_inout.emplace(fd.fd_meta.lvm_name.to_string());
123✔
1013
                }
1014
            }
1015
        }
10✔
1016

1017
        const bro_log_format& blt_format;
1018
    };
1019

1020
    static std::map<intern_string_t, std::shared_ptr<bro_log_table>>&
1021
    get_tables()
20✔
1022
    {
1023
        static std::map<intern_string_t, std::shared_ptr<bro_log_table>> retval;
20✔
1024

1025
        return retval;
20✔
1026
    }
1027

1028
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
599✔
1029
    {
1030
        if (this->blf_format_name.empty()) {
599✔
1031
            return nullptr;
579✔
1032
        }
1033

1034
        std::shared_ptr<bro_log_table> retval = nullptr;
20✔
1035

1036
        auto& tables = get_tables();
20✔
1037
        const auto iter = tables.find(this->blf_format_name);
20✔
1038
        if (iter == tables.end()) {
20✔
1039
            retval = std::make_shared<bro_log_table>(*this);
20✔
1040
            tables[this->blf_format_name] = retval;
20✔
1041
        }
1042

1043
        return retval;
20✔
1044
    }
20✔
1045

1046
    void get_subline(const logline& ll,
33,721✔
1047
                     shared_buffer_ref& sbr,
1048
                     subline_options opts) override
1049
    {
1050
    }
33,721✔
1051

1052
    intern_string_t blf_format_name;
1053
    intern_string_t blf_separator;
1054
    intern_string_t blf_set_separator;
1055
    intern_string_t blf_empty_field;
1056
    intern_string_t blf_unset_field;
1057
    std::vector<field_def> blf_field_defs;
1058
};
1059

1060
std::unordered_map<const intern_string_t, logline_value_meta>
1061
    bro_log_format::FIELD_META;
1062

1063
const intern_string_t bro_log_format::TS = intern_string::lookup("bro_ts");
1064

1065
struct ws_separated_string {
1066
    const char* ss_str;
1067
    size_t ss_len;
1068

1069
    explicit ws_separated_string(const char* str = nullptr, size_t len = -1)
18,410✔
1070
        : ss_str(str), ss_len(len)
18,410✔
1071
    {
1072
    }
18,410✔
1073

1074
    struct iterator {
1075
        enum class state_t {
1076
            NORMAL,
1077
            QUOTED,
1078
        };
1079

1080
        const ws_separated_string& i_parent;
1081
        const char* i_pos;
1082
        const char* i_next_pos;
1083
        size_t i_index{0};
1084
        state_t i_state{state_t::NORMAL};
1085

1086
        iterator(const ws_separated_string& ss, const char* pos)
29,524✔
1087
            : i_parent(ss), i_pos(pos), i_next_pos(pos)
29,524✔
1088
        {
1089
            this->update();
29,524✔
1090
        }
29,524✔
1091

1092
        void update()
40,090✔
1093
        {
1094
            const auto& ss = this->i_parent;
40,090✔
1095
            bool done = false;
40,090✔
1096

1097
            while (!done && this->i_next_pos < (ss.ss_str + ss.ss_len)) {
318,729✔
1098
                switch (this->i_state) {
278,639✔
1099
                    case state_t::NORMAL:
271,881✔
1100
                        if (*this->i_next_pos == '"') {
271,881✔
1101
                            this->i_state = state_t::QUOTED;
255✔
1102
                        } else if (isspace(*this->i_next_pos)) {
271,626✔
1103
                            done = true;
24,719✔
1104
                        }
1105
                        break;
271,881✔
1106
                    case state_t::QUOTED:
6,758✔
1107
                        if (*this->i_next_pos == '"') {
6,758✔
1108
                            this->i_state = state_t::NORMAL;
255✔
1109
                        }
1110
                        break;
6,758✔
1111
                }
1112
                if (!done) {
278,639✔
1113
                    this->i_next_pos += 1;
253,920✔
1114
                }
1115
            }
1116
        }
40,090✔
1117

1118
        iterator& operator++()
10,566✔
1119
        {
1120
            const auto& ss = this->i_parent;
10,566✔
1121

1122
            this->i_pos = this->i_next_pos;
10,566✔
1123
            while (this->i_pos < (ss.ss_str + ss.ss_len)
10,566✔
1124
                   && isspace(*this->i_pos))
20,590✔
1125
            {
1126
                this->i_pos += 1;
10,024✔
1127
                this->i_next_pos += 1;
10,024✔
1128
            }
1129
            this->update();
10,566✔
1130
            this->i_index += 1;
10,566✔
1131

1132
            return *this;
10,566✔
1133
        }
1134

1135
        string_fragment operator*()
25,908✔
1136
        {
1137
            const auto& ss = this->i_parent;
25,908✔
1138
            int end = this->i_next_pos - ss.ss_str;
25,908✔
1139

1140
            return string_fragment(ss.ss_str, this->i_pos - ss.ss_str, end);
25,908✔
1141
        }
1142

1143
        bool operator==(const iterator& other) const
11,114✔
1144
        {
1145
            return (&this->i_parent == &other.i_parent)
11,114✔
1146
                && (this->i_pos == other.i_pos);
11,114✔
1147
        }
1148

1149
        bool operator!=(const iterator& other) const
8,545✔
1150
        {
1151
            return !(*this == other);
8,545✔
1152
        }
1153

1154
        size_t index() const { return this->i_index; }
15,809✔
1155
    };
1156

1157
    iterator begin() { return {*this, this->ss_str}; }
18,410✔
1158

1159
    iterator end() { return {*this, this->ss_str + this->ss_len}; }
11,114✔
1160
};
1161

1162
class w3c_log_format : public log_format {
1163
public:
1164
    static const intern_string_t F_DATE;
1165
    static const intern_string_t F_TIME;
1166

1167
    struct field_def {
1168
        const intern_string_t fd_name;
1169
        logline_value_meta fd_meta;
1170
        logline_value_meta* fd_root_meta{nullptr};
1171
        std::string fd_collator;
1172
        std::optional<size_t> fd_numeric_index;
1173

1174
        explicit field_def(const intern_string_t name)
14✔
1175
            : fd_name(name), fd_meta(intern_string::lookup(sql_safe_ident(
28✔
1176
                                         name.to_string_fragment())),
28✔
1177
                                     value_kind_t::VALUE_TEXT)
14✔
1178
        {
1179
        }
14✔
1180

1181
        field_def(const intern_string_t name, logline_value_meta meta)
59✔
1182
            : fd_name(name), fd_meta(meta)
59✔
1183
        {
1184
        }
59✔
1185

1186
        field_def(size_t col,
9,184✔
1187
                  const char* name,
1188
                  value_kind_t kind,
1189
                  bool ident = false,
1190
                  bool foreign_key = false,
1191
                  std::string coll = "")
1192
            : fd_name(intern_string::lookup(name)),
18,368✔
1193
              fd_meta(
18,368✔
1194
                  intern_string::lookup(sql_safe_ident(string_fragment(name))),
18,368✔
1195
                  kind,
1196
                  logline_value_meta::table_column{col}),
9,184✔
1197
              fd_collator(std::move(coll))
9,184✔
1198
        {
1199
            this->fd_meta.lvm_identifier = ident;
9,184✔
1200
            this->fd_meta.lvm_foreign_key = foreign_key;
9,184✔
1201
        }
9,184✔
1202

1203
        field_def& with_kind(value_kind_t kind,
1204
                             bool identifier = false,
1205
                             const std::string& collator = "")
1206
        {
1207
            this->fd_meta.lvm_kind = kind;
1208
            this->fd_meta.lvm_identifier = identifier;
1209
            this->fd_collator = collator;
1210
            return *this;
1211
        }
1212

1213
        field_def& with_numeric_index(int index)
27✔
1214
        {
1215
            this->fd_numeric_index = index;
27✔
1216
            return *this;
27✔
1217
        }
1218
    };
1219

1220
    static std::unordered_map<const intern_string_t, logline_value_meta>
1221
        FIELD_META;
1222

1223
    struct field_to_struct_t {
1224
        field_to_struct_t(const char* prefix, const char* struct_name)
2,296✔
1225
            : fs_prefix(prefix),
2,296✔
1226
              fs_struct_name(intern_string::lookup(struct_name))
4,592✔
1227
        {
1228
        }
2,296✔
1229

1230
        const char* fs_prefix;
1231
        intern_string_t fs_struct_name;
1232
    };
1233

1234
    static const std::array<field_def, 16>& get_known_fields()
587✔
1235
    {
1236
        static size_t KNOWN_FIELD_INDEX = 0;
1237
        static const std::array<field_def, 16> RETVAL = {
1238
            field_def{
1239
                KNOWN_FIELD_INDEX++,
1240
                "cs-method",
1241
                value_kind_t::VALUE_TEXT,
1242
                true,
1243
            },
1244
            {
1245
                KNOWN_FIELD_INDEX++,
1246
                "c-ip",
1247
                value_kind_t::VALUE_TEXT,
1248
                true,
1249
                false,
1250
                "ipaddress",
1251
            },
1252
            {
1253
                KNOWN_FIELD_INDEX++,
1254
                "cs-bytes",
1255
                value_kind_t::VALUE_INTEGER,
1256
                false,
1257
            },
1258
            {
1259
                KNOWN_FIELD_INDEX++,
1260
                "cs-host",
1261
                value_kind_t::VALUE_TEXT,
1262
                true,
1263
            },
1264
            {
1265
                KNOWN_FIELD_INDEX++,
1266
                "cs-uri-stem",
1267
                value_kind_t::VALUE_TEXT,
1268
                true,
1269
                false,
1270
                "naturalnocase",
1271
            },
1272
            {
1273
                KNOWN_FIELD_INDEX++,
1274
                "cs-uri-query",
1275
                value_kind_t::VALUE_TEXT,
1276
                false,
1277
            },
1278
            {
1279
                KNOWN_FIELD_INDEX++,
1280
                "cs-username",
1281
                value_kind_t::VALUE_TEXT,
1282
                false,
1283
            },
1284
            {
1285
                KNOWN_FIELD_INDEX++,
1286
                "cs-version",
1287
                value_kind_t::VALUE_TEXT,
1288
                true,
1289
            },
1290
            {
1291
                KNOWN_FIELD_INDEX++,
1292
                "s-ip",
1293
                value_kind_t::VALUE_TEXT,
1294
                true,
1295
                false,
1296
                "ipaddress",
1297
            },
1298
            {
1299
                KNOWN_FIELD_INDEX++,
1300
                "s-port",
1301
                value_kind_t::VALUE_INTEGER,
1302
                true,
1303
            },
1304
            {
1305
                KNOWN_FIELD_INDEX++,
1306
                "s-computername",
1307
                value_kind_t::VALUE_TEXT,
1308
                true,
1309
            },
1310
            {
1311
                KNOWN_FIELD_INDEX++,
1312
                "s-sitename",
1313
                value_kind_t::VALUE_TEXT,
1314
                true,
1315
            },
1316
            {
1317
                KNOWN_FIELD_INDEX++,
1318
                "sc-bytes",
1319
                value_kind_t::VALUE_INTEGER,
1320
                false,
1321
            },
1322
            {
1323
                KNOWN_FIELD_INDEX++,
1324
                "sc-status",
1325
                value_kind_t::VALUE_INTEGER,
1326
                false,
1327
                true,
1328
            },
1329
            {
1330
                KNOWN_FIELD_INDEX++,
1331
                "sc-substatus",
1332
                value_kind_t::VALUE_INTEGER,
1333
                false,
1334
            },
1335
            {
1336
                KNOWN_FIELD_INDEX++,
1337
                "time-taken",
1338
                value_kind_t::VALUE_FLOAT,
1339
                false,
1340
            },
1341
        };
1,735✔
1342

1343
        return RETVAL;
587✔
1344
    }
1345

1346
    static const std::array<field_to_struct_t, 4>& get_known_struct_fields()
584✔
1347
    {
1348
        static const std::array<field_to_struct_t, 4> RETVAL = {
1349
            field_to_struct_t{"cs(", "cs_headers"},
1350
            {"sc(", "sc_headers"},
1351
            {"rs(", "rs_headers"},
1352
            {"sr(", "sr_headers"},
1353
        };
584✔
1354

1355
        return RETVAL;
584✔
1356
    }
1357

1358
    w3c_log_format()
699✔
1359
    {
699✔
1360
        this->lf_is_self_describing = true;
699✔
1361
        this->lf_time_ordered = false;
699✔
1362
        this->lf_structured = true;
699✔
1363
    }
699✔
1364

1365
    const intern_string_t get_name() const override
12,590✔
1366
    {
1367
        static const intern_string_t name(intern_string::lookup("w3c_log"));
13,976✔
1368

1369
        return this->wlf_format_name.empty() ? name : this->wlf_format_name;
12,590✔
1370
    }
1371

1372
    void clear() override
12,934✔
1373
    {
1374
        this->log_format::clear();
12,934✔
1375
        this->wlf_time_scanner.clear();
12,934✔
1376
        this->wlf_format_name.clear();
12,934✔
1377
        this->wlf_field_defs.clear();
12,934✔
1378
    }
12,934✔
1379

1380
    scan_result_t scan_int(std::vector<logline>& dst,
311✔
1381
                           const line_info& li,
1382
                           shared_buffer_ref& sbr)
1383
    {
1384
        static const intern_string_t F_DATE_LOCAL
1385
            = intern_string::lookup("date-local");
337✔
1386
        static const intern_string_t F_DATE_UTC
1387
            = intern_string::lookup("date-UTC");
337✔
1388
        static const intern_string_t F_TIME_LOCAL
1389
            = intern_string::lookup("time-local");
337✔
1390
        static const intern_string_t F_TIME_UTC
1391
            = intern_string::lookup("time-UTC");
337✔
1392
        static const intern_string_t F_STATUS_CODE
1393
            = intern_string::lookup("sc-status");
337✔
1394

1395
        ws_separated_string ss(sbr.get_data(), sbr.length());
311✔
1396
        timeval date_tv{0, 0}, time_tv{0, 0};
311✔
1397
        exttm date_tm, time_tm;
311✔
1398
        bool found_date = false, found_time = false;
311✔
1399
        log_level_t level = LEVEL_INFO;
311✔
1400

1401
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
4,341✔
1402
            if (iter.index() >= this->wlf_field_defs.size()) {
4,080✔
1403
                level = LEVEL_INVALID;
1✔
1404
                break;
1✔
1405
            }
1406

1407
            const auto& fd = this->wlf_field_defs[iter.index()];
4,079✔
1408
            string_fragment sf = *iter;
4,079✔
1409

1410
            if (sf.startswith("#")) {
4,079✔
1411
                if (sf == "#Date:") {
49✔
1412
                    auto sbr_sf_opt
1413
                        = sbr.to_string_fragment().consume_n(sf.length());
13✔
1414

1415
                    if (sbr_sf_opt) {
13✔
1416
                        auto sbr_sf = sbr_sf_opt.value().trim();
13✔
1417
                        date_time_scanner dts;
13✔
1418
                        exttm tm;
13✔
1419
                        timeval tv;
1420

1421
                        if (dts.scan(sbr_sf.data(),
13✔
1422
                                     sbr_sf.length(),
13✔
1423
                                     nullptr,
1424
                                     &tm,
1425
                                     tv))
1426
                        {
1427
                            this->lf_date_time.set_base_time(tv.tv_sec,
12✔
1428
                                                             tm.et_tm);
1429
                            this->wlf_time_scanner.set_base_time(tv.tv_sec,
12✔
1430
                                                                 tm.et_tm);
1431
                        }
1432
                    }
1433
                }
1434
                dst.emplace_back(li.li_file_range.fr_offset,
49✔
UNCOV
1435
                                 std::chrono::microseconds{0},
×
UNCOV
1436
                                 LEVEL_IGNORE,
×
1437
                                 0);
49✔
1438
                return scan_match{2000};
49✔
1439
            }
1440

1441
            sf = sf.trim("\" \t");
4,030✔
1442
            if (F_DATE == fd.fd_name || F_DATE_LOCAL == fd.fd_name
7,842✔
1443
                || F_DATE_UTC == fd.fd_name)
7,842✔
1444
            {
1445
                if (this->lf_date_time.scan(
226✔
1446
                        sf.data(), sf.length(), nullptr, &date_tm, date_tv))
226✔
1447
                {
1448
                    this->lf_timestamp_flags |= date_tm.et_flags;
225✔
1449
                    found_date = true;
225✔
1450
                }
1451
            } else if (F_TIME == fd.fd_name || F_TIME_LOCAL == fd.fd_name
7,359✔
1452
                       || F_TIME_UTC == fd.fd_name)
7,359✔
1453
            {
1454
                if (this->wlf_time_scanner.scan(
257✔
1455
                        sf.data(), sf.length(), nullptr, &time_tm, time_tv))
257✔
1456
                {
1457
                    this->lf_timestamp_flags |= time_tm.et_flags;
257✔
1458
                    found_time = true;
257✔
1459
                }
1460
            } else if (F_STATUS_CODE == fd.fd_name) {
3,547✔
1461
                if (!sf.empty() && sf[0] >= '4') {
254✔
1462
                    level = LEVEL_ERROR;
206✔
1463
                }
1464
            }
1465

1466
            if (fd.fd_numeric_index) {
4,030✔
1467
                switch (fd.fd_meta.lvm_kind) {
1,338✔
1468
                    case value_kind_t::VALUE_INTEGER:
1,338✔
1469
                    case value_kind_t::VALUE_FLOAT: {
1470
                        auto scan_float_res
1471
                            = scn::scan_value<double>(sf.to_string_view());
1,338✔
1472

1473
                        if (scan_float_res) {
1,338✔
1474
                            this->lf_value_stats[fd.fd_numeric_index.value()]
1,334✔
1475
                                .add_value(scan_float_res->value());
1,334✔
1476
                        }
1477
                        break;
1,338✔
1478
                    }
UNCOV
1479
                    default:
×
UNCOV
1480
                        break;
×
1481
                }
1482
            }
1483
        }
1484

1485
        if (found_time) {
262✔
1486
            auto tm = time_tm;
257✔
1487

1488
            if (found_date) {
257✔
1489
                tm.et_tm.tm_year = date_tm.et_tm.tm_year;
225✔
1490
                tm.et_tm.tm_mday = date_tm.et_tm.tm_mday;
225✔
1491
                tm.et_tm.tm_mon = date_tm.et_tm.tm_mon;
225✔
1492
                tm.et_tm.tm_wday = date_tm.et_tm.tm_wday;
225✔
1493
                tm.et_tm.tm_yday = date_tm.et_tm.tm_yday;
225✔
1494
            }
1495

1496
            auto tv = tm.to_timeval();
257✔
1497
            if (!this->lf_specialized) {
257✔
1498
                for (auto& ll : dst) {
50✔
1499
                    ll.set_ignore(true);
40✔
1500
                }
1501
            }
1502
            dst.emplace_back(li.li_file_range.fr_offset, tv, level, 0);
257✔
1503
            return scan_match{2000};
257✔
1504
        }
1505

1506
        return scan_no_match{"no header found"};
5✔
1507
    }
1508

1509
    scan_result_t scan(logfile& lf,
10,421✔
1510
                       std::vector<logline>& dst,
1511
                       const line_info& li,
1512
                       shared_buffer_ref& sbr,
1513
                       scan_batch_context& sbc) override
1514
    {
1515
        static const auto* W3C_LOG_NAME = intern_string::lookup("w3c_log");
11,569✔
1516
        static const auto* X_FIELDS_NAME = intern_string::lookup("x_fields");
11,569✔
1517
        static const auto& KNOWN_FIELDS = get_known_fields();
10,421✔
1518
        static const auto& KNOWN_STRUCT_FIELDS = get_known_struct_fields();
10,421✔
1519
        static auto X_FIELDS_IDX = 0;
1520

1521
        if (li.li_partial) {
10,421✔
1522
            return scan_incomplete{};
17✔
1523
        }
1524

1525
        if (dst.empty()) {
10,404✔
1526
            auto file_options = lf.get_file_options();
1,068✔
1527

1528
            if (file_options) {
1,068✔
1529
                this->lf_date_time.dts_default_zone
1530
                    = file_options->second.fo_default_zone.pp_value;
53✔
1531
            } else {
1532
                this->lf_date_time.dts_default_zone = nullptr;
1,015✔
1533
            }
1534
        }
1,068✔
1535

1536
        if (!this->wlf_format_name.empty()) {
10,404✔
1537
            return this->scan_int(dst, li, sbr);
296✔
1538
        }
1539

1540
        if (dst.empty() || dst.size() > 20 || sbr.empty()
19,148✔
1541
            || sbr.get_data()[0] == '#')
19,148✔
1542
        {
1543
            return scan_no_match{"no header found"};
7,591✔
1544
        }
1545

1546
        this->clear();
2,517✔
1547

1548
        for (auto line_iter = dst.begin(); line_iter != dst.end(); ++line_iter)
20,379✔
1549
        {
1550
            auto next_read_result = lf.read_line(line_iter);
17,862✔
1551

1552
            if (next_read_result.isErr()) {
17,862✔
UNCOV
1553
                return scan_no_match{"unable to read first line"};
×
1554
            }
1555

1556
            auto line = next_read_result.unwrap();
17,862✔
1557
            ws_separated_string ss(line.get_data(), line.length());
17,862✔
1558
            auto iter = ss.begin();
17,862✔
1559
            const auto directive = *iter;
17,862✔
1560

1561
            if (directive.empty() || directive[0] != '#') {
17,862✔
1562
                continue;
15,293✔
1563
            }
1564

1565
            ++iter;
2,569✔
1566
            if (iter == ss.end()) {
2,569✔
1567
                continue;
41✔
1568
            }
1569

1570
            if (directive == "#Date:") {
2,528✔
1571
                date_time_scanner dts;
8✔
1572
                struct exttm tm;
8✔
1573
                struct timeval tv;
1574

1575
                if (dts.scan(line.get_data_at(directive.length() + 1),
8✔
1576
                             line.length() - directive.length() - 1,
8✔
1577
                             nullptr,
1578
                             &tm,
1579
                             tv))
1580
                {
1581
                    this->lf_date_time.set_base_time(tv.tv_sec, tm.et_tm);
7✔
1582
                    this->wlf_time_scanner.set_base_time(tv.tv_sec, tm.et_tm);
7✔
1583
                }
1584
            } else if (directive == "#Fields:" && this->wlf_field_defs.empty())
2,520✔
1585
            {
1586
                int numeric_count = 0;
15✔
1587

1588
                do {
1589
                    auto sf = (*iter).trim(")");
142✔
1590

1591
                    auto field_iter = std::find_if(
426✔
1592
                        begin(KNOWN_FIELDS),
1593
                        end(KNOWN_FIELDS),
1594
                        [&sf](auto elem) { return sf == elem.fd_name; });
1,676✔
1595
                    if (field_iter != end(KNOWN_FIELDS)) {
284✔
1596
                        this->wlf_field_defs.emplace_back(*field_iter);
69✔
1597
                        auto& fd = this->wlf_field_defs.back();
69✔
1598
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
69✔
1599
                        if (common_iter == FIELD_META.end()) {
69✔
1600
                            auto emp_res = FIELD_META.emplace(
68✔
1601
                                fd.fd_meta.lvm_name, fd.fd_meta);
68✔
1602
                            common_iter = emp_res.first;
68✔
1603
                        }
1604
                        fd.fd_root_meta = &common_iter->second;
69✔
1605
                    } else if (sf.is_one_of("date", "time")) {
73✔
1606
                        this->wlf_field_defs.emplace_back(
28✔
1607
                            intern_string::lookup(sf));
14✔
1608
                        auto& fd = this->wlf_field_defs.back();
14✔
1609
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
14✔
1610
                        if (common_iter == FIELD_META.end()) {
14✔
1611
                            auto emp_res = FIELD_META.emplace(
13✔
1612
                                fd.fd_meta.lvm_name, fd.fd_meta);
13✔
1613
                            common_iter = emp_res.first;
13✔
1614
                        }
1615
                        fd.fd_root_meta = &common_iter->second;
14✔
1616
                    } else {
1617
                        const auto fs_iter = std::find_if(
177✔
1618
                            begin(KNOWN_STRUCT_FIELDS),
1619
                            end(KNOWN_STRUCT_FIELDS),
1620
                            [&sf](auto elem) {
197✔
1621
                                return sf.startswith(elem.fs_prefix);
197✔
1622
                            });
1623
                        if (fs_iter != end(KNOWN_STRUCT_FIELDS)) {
118✔
1624
                            const intern_string_t field_name
1625
                                = intern_string::lookup(sf.substr(3));
13✔
1626
                            this->wlf_field_defs.emplace_back(
13✔
1627
                                field_name,
1628
                                logline_value_meta(
26✔
1629
                                    field_name,
1630
                                    value_kind_t::VALUE_TEXT,
UNCOV
1631
                                    logline_value_meta::table_column{
×
1632
                                        KNOWN_FIELDS.size() + 1
13✔
1633
                                        + std::distance(
39✔
1634
                                            begin(KNOWN_STRUCT_FIELDS),
1635
                                            fs_iter)},
1636
                                    this)
26✔
1637
                                    .with_struct_name(fs_iter->fs_struct_name));
1638
                        } else {
1639
                            const intern_string_t field_name
1640
                                = intern_string::lookup(sf);
46✔
1641
                            this->wlf_field_defs.emplace_back(
46✔
1642
                                field_name,
1643
                                logline_value_meta(
92✔
1644
                                    field_name,
1645
                                    value_kind_t::VALUE_TEXT,
UNCOV
1646
                                    logline_value_meta::table_column{
×
1647
                                        KNOWN_FIELDS.size() + X_FIELDS_IDX},
92✔
1648
                                    this)
92✔
1649
                                    .with_struct_name(X_FIELDS_NAME));
1650
                        }
1651
                    }
1652
                    auto& fd = this->wlf_field_defs.back();
142✔
1653
                    fd.fd_meta.lvm_format = std::make_optional(this);
142✔
1654
                    switch (fd.fd_meta.lvm_kind) {
142✔
1655
                        case value_kind_t::VALUE_FLOAT:
27✔
1656
                        case value_kind_t::VALUE_INTEGER:
1657
                            fd.with_numeric_index(numeric_count);
27✔
1658
                            numeric_count += 1;
27✔
1659
                            break;
27✔
1660
                        default:
115✔
1661
                            break;
115✔
1662
                    }
1663

1664
                    ++iter;
142✔
1665
                } while (iter != ss.end());
142✔
1666

1667
                this->wlf_format_name = W3C_LOG_NAME;
15✔
1668
                this->lf_value_stats.resize(numeric_count);
15✔
1669
            }
1670
        }
33,196✔
1671

1672
        if (!this->wlf_format_name.empty() && !this->wlf_field_defs.empty()) {
2,517✔
1673
            return this->scan_int(dst, li, sbr);
15✔
1674
        }
1675

1676
        this->wlf_format_name.clear();
2,502✔
1677
        this->lf_value_stats.clear();
2,502✔
1678

1679
        return scan_no_match{"no header found"};
2,502✔
1680
    }
1681

1682
    void annotate(logfile* lf,
237✔
1683
                  uint64_t line_number,
1684
                  string_attrs_t& sa,
1685
                  logline_value_vector& values,
1686
                  bool annotate_module) const override
1687
    {
1688
        auto& sbr = values.lvv_sbr;
237✔
1689
        ws_separated_string ss(sbr.get_data(), sbr.length());
237✔
1690
        std::optional<line_range> date_lr;
237✔
1691
        std::optional<line_range> time_lr;
237✔
1692

1693
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
4,062✔
1694
            auto sf = *iter;
3,825✔
1695

1696
            if (iter.index() >= this->wlf_field_defs.size()) {
3,825✔
UNCOV
1697
                sa.emplace_back(line_range{sf.sf_begin, -1},
×
UNCOV
1698
                                SA_INVALID.value("extra fields detected"));
×
UNCOV
1699
                return;
×
1700
            }
1701

1702
            const auto& fd = this->wlf_field_defs[iter.index()];
3,825✔
1703

1704
            if (sf == "-") {
3,825✔
1705
                sf.invalidate();
659✔
1706
            }
1707

1708
            auto lr = line_range(sf.sf_begin, sf.sf_end);
3,825✔
1709

1710
            if (lr.is_valid()) {
3,825✔
1711
                if (fd.fd_meta.lvm_name == F_DATE) {
3,166✔
1712
                    date_lr = lr;
215✔
1713
                } else if (fd.fd_meta.lvm_name == F_TIME) {
2,951✔
1714
                    time_lr = lr;
229✔
1715
                }
1716
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
3,166✔
1717
                if (sf.startswith("\"")) {
3,166✔
1718
                    auto& meta = values.lvv_values.back().lv_meta;
28✔
1719

1720
                    if (meta.lvm_kind == value_kind_t::VALUE_TEXT) {
28✔
1721
                        meta.lvm_kind = value_kind_t::VALUE_W3C_QUOTED;
26✔
1722
                    } else {
1723
                        meta.lvm_kind = value_kind_t::VALUE_NULL;
2✔
1724
                    }
1725
                }
1726
            } else {
1727
                values.lvv_values.emplace_back(fd.fd_meta);
659✔
1728
            }
1729
            if (fd.fd_root_meta != nullptr) {
3,825✔
1730
                values.lvv_values.back().lv_meta.lvm_user_hidden
3,128✔
1731
                    = fd.fd_root_meta->lvm_user_hidden;
3,128✔
1732
            }
1733
        }
1734
        if (time_lr) {
237✔
1735
            auto ts_lr = time_lr.value();
229✔
1736
            if (date_lr) {
229✔
1737
                if (date_lr->lr_end + 1 == time_lr->lr_start) {
214✔
1738
                    ts_lr.lr_start = date_lr->lr_start;
213✔
1739
                    ts_lr.lr_end = time_lr->lr_end;
213✔
1740
                }
1741
            }
1742

1743
            sa.emplace_back(ts_lr, L_TIMESTAMP.value());
229✔
1744
        }
1745
        log_format::annotate(lf, line_number, sa, values, annotate_module);
237✔
1746
    }
1747

UNCOV
1748
    const logline_value_stats* stats_for_value(
×
1749
        const intern_string_t& name) const override
1750
    {
UNCOV
1751
        const logline_value_stats* retval = nullptr;
×
1752

UNCOV
1753
        for (const auto& wlf_field_def : this->wlf_field_defs) {
×
UNCOV
1754
            if (wlf_field_def.fd_meta.lvm_name == name) {
×
UNCOV
1755
                if (!wlf_field_def.fd_numeric_index) {
×
UNCOV
1756
                    break;
×
1757
                }
1758
                retval = &this->lf_value_stats[wlf_field_def.fd_numeric_index
UNCOV
1759
                                                   .value()];
×
UNCOV
1760
                break;
×
1761
            }
1762
        }
1763

UNCOV
1764
        return retval;
×
1765
    }
1766

UNCOV
1767
    bool hide_field(const intern_string_t field_name, bool val) override
×
1768
    {
UNCOV
1769
        if (field_name == LOG_TIME_STR) {
×
UNCOV
1770
            auto date_iter = FIELD_META.find(F_DATE);
×
UNCOV
1771
            auto time_iter = FIELD_META.find(F_TIME);
×
UNCOV
1772
            if (date_iter == FIELD_META.end() || time_iter == FIELD_META.end())
×
1773
            {
UNCOV
1774
                return false;
×
1775
            }
UNCOV
1776
            date_iter->second.lvm_user_hidden = val;
×
UNCOV
1777
            time_iter->second.lvm_user_hidden = val;
×
UNCOV
1778
            return true;
×
1779
        }
1780

UNCOV
1781
        auto fd_iter = FIELD_META.find(field_name);
×
UNCOV
1782
        if (fd_iter == FIELD_META.end()) {
×
UNCOV
1783
            return false;
×
1784
        }
1785

UNCOV
1786
        fd_iter->second.lvm_user_hidden = val;
×
1787

UNCOV
1788
        return true;
×
1789
    }
1790

1791
    std::map<intern_string_t, logline_value_meta> get_field_states() override
119✔
1792
    {
1793
        std::map<intern_string_t, logline_value_meta> retval;
119✔
1794

1795
        for (const auto& fd : FIELD_META) {
119✔
UNCOV
1796
            retval.emplace(fd.first, fd.second);
×
1797
        }
1798

1799
        return retval;
119✔
UNCOV
1800
    }
×
1801

1802
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
10✔
1803
    {
1804
        auto retval = std::make_shared<w3c_log_format>(*this);
10✔
1805

1806
        retval->lf_specialized = true;
10✔
1807
        return retval;
20✔
1808
    }
10✔
1809

1810
    class w3c_log_table : public log_format_vtab_impl {
1811
    public:
1812
        explicit w3c_log_table(const w3c_log_format& format)
7✔
1813
            : log_format_vtab_impl(format), wlt_format(format)
7✔
1814
        {
1815
        }
7✔
1816

1817
        void get_columns(std::vector<vtab_column>& cols) const override
10✔
1818
        {
1819
            for (const auto& fd : get_known_fields()) {
170✔
1820
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
160✔
1821
                    fd.fd_meta.lvm_kind);
160✔
1822

1823
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
160✔
1824
                                  type_pair.first,
1825
                                  fd.fd_collator,
160✔
1826
                                  false,
320✔
1827
                                  "",
1828
                                  type_pair.second);
1829
            }
1830
            cols.emplace_back("x_fields");
10✔
1831
            cols.back().with_comment(
20✔
1832
                "A JSON-object that contains fields that are not first-class "
1833
                "columns");
1834
            for (const auto& fs : get_known_struct_fields()) {
50✔
1835
                cols.emplace_back(fs.fs_struct_name.to_string());
40✔
1836
            }
1837
        };
10✔
1838

1839
        void get_foreign_keys(
3✔
1840
            std::unordered_set<std::string>& keys_inout) const override
1841
        {
1842
            this->log_vtab_impl::get_foreign_keys(keys_inout);
3✔
1843

1844
            for (const auto& fd : get_known_fields()) {
51✔
1845
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
48✔
1846
                    keys_inout.emplace(fd.fd_meta.lvm_name.to_string());
30✔
1847
                }
1848
            }
1849
        }
3✔
1850

1851
        const w3c_log_format& wlt_format;
1852
    };
1853

1854
    static std::map<intern_string_t, std::shared_ptr<w3c_log_table>>&
1855
    get_tables()
7✔
1856
    {
1857
        static std::map<intern_string_t, std::shared_ptr<w3c_log_table>> retval;
7✔
1858

1859
        return retval;
7✔
1860
    }
1861

1862
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
586✔
1863
    {
1864
        if (this->wlf_format_name.empty()) {
586✔
1865
            return nullptr;
579✔
1866
        }
1867

1868
        std::shared_ptr<w3c_log_table> retval = nullptr;
7✔
1869

1870
        auto& tables = get_tables();
7✔
1871
        const auto iter = tables.find(this->wlf_format_name);
7✔
1872
        if (iter == tables.end()) {
7✔
1873
            retval = std::make_shared<w3c_log_table>(*this);
7✔
1874
            tables[this->wlf_format_name] = retval;
7✔
1875
        }
1876

1877
        return retval;
7✔
1878
    }
7✔
1879

1880
    void get_subline(const logline& ll,
344✔
1881
                     shared_buffer_ref& sbr,
1882
                     subline_options opts) override
1883
    {
1884
    }
344✔
1885

1886
    date_time_scanner wlf_time_scanner;
1887
    intern_string_t wlf_format_name;
1888
    std::vector<field_def> wlf_field_defs;
1889
};
1890

1891
std::unordered_map<const intern_string_t, logline_value_meta>
1892
    w3c_log_format::FIELD_META;
1893

1894
const intern_string_t w3c_log_format::F_DATE = intern_string::lookup("date");
1895
const intern_string_t w3c_log_format::F_TIME = intern_string::lookup("time");
1896

1897
struct logfmt_pair_handler {
1898
    explicit logfmt_pair_handler(date_time_scanner& dts) : lph_dt_scanner(dts)
10,417✔
1899
    {
1900
    }
10,417✔
1901

1902
    log_format::scan_result_t process_value(const string_fragment& value_frag)
3,520✔
1903
    {
1904
        if (this->lph_key_frag.is_one_of(
3,520✔
1905
                "timestamp"_frag, "time"_frag, "ts"_frag, "t"_frag))
1906
        {
1907
            if (!this->lph_dt_scanner.scan(value_frag.data(),
31✔
1908
                                           value_frag.length(),
31✔
1909
                                           nullptr,
1910
                                           &this->lph_time_tm,
1911
                                           this->lph_tv))
31✔
1912
            {
UNCOV
1913
                return log_format::scan_no_match{
×
UNCOV
1914
                    "timestamp value did not parse correctly"};
×
1915
            }
1916
            char buf[1024];
1917
            this->lph_dt_scanner.ftime(
31✔
1918
                buf, sizeof(buf), nullptr, this->lph_time_tm);
31✔
1919
            this->lph_found_time = true;
31✔
1920
        } else if (this->lph_key_frag.is_one_of("level"_frag, "lvl"_frag)) {
3,489✔
1921
            this->lph_level
1922
                = string2level(value_frag.data(), value_frag.length());
40✔
1923
        }
1924
        return log_format::scan_match{};
3,520✔
1925
    }
1926

1927
    date_time_scanner& lph_dt_scanner;
1928
    bool lph_found_time{false};
1929
    exttm lph_time_tm;
1930
    timeval lph_tv{0, 0};
1931
    log_level_t lph_level{log_level_t::LEVEL_INFO};
1932
    string_fragment lph_key_frag{""};
1933
};
1934

1935
class logfmt_format : public log_format {
1936
public:
1937
    const intern_string_t get_name() const override
12,911✔
1938
    {
1939
        const static intern_string_t NAME = intern_string::lookup("logfmt_log");
14,297✔
1940

1941
        return NAME;
12,911✔
1942
    }
1943

1944
    class logfmt_log_table : public log_format_vtab_impl {
1945
    public:
1946
        logfmt_log_table(const log_format& format)
579✔
1947
            : log_format_vtab_impl(format)
579✔
1948
        {
1949
        }
579✔
1950

1951
        void get_columns(std::vector<vtab_column>& cols) const override
580✔
1952
        {
1953
            static const auto FIELDS = std::string("fields");
1,738✔
1954

1955
            cols.emplace_back(FIELDS);
580✔
1956
        }
580✔
1957
    };
1958

1959
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
579✔
1960
    {
1961
        static auto retval = std::make_shared<logfmt_log_table>(*this);
579✔
1962

1963
        return retval;
579✔
1964
    }
1965

1966
    scan_result_t scan(logfile& lf,
10,417✔
1967
                       std::vector<logline>& dst,
1968
                       const line_info& li,
1969
                       shared_buffer_ref& sbr,
1970
                       scan_batch_context& sbc) override
1971
    {
1972
        auto p = logfmt::parser(sbr.to_string_fragment());
10,417✔
1973
        scan_result_t retval = scan_no_match{};
10,417✔
1974
        bool done = false;
10,417✔
1975
        logfmt_pair_handler lph(this->lf_date_time);
10,417✔
1976

1977
        if (dst.empty()) {
10,417✔
1978
            auto file_options = lf.get_file_options();
1,080✔
1979

1980
            if (file_options) {
1,080✔
1981
                this->lf_date_time.dts_default_zone
1982
                    = file_options->second.fo_default_zone.pp_value;
53✔
1983
            } else {
1984
                this->lf_date_time.dts_default_zone = nullptr;
1,027✔
1985
            }
1986
        }
1,080✔
1987

1988
        while (!done) {
24,354✔
1989
            auto parse_result = p.step();
13,937✔
1990

1991
            auto value_res = parse_result.match(
UNCOV
1992
                [&done](const logfmt::parser::end_of_input&) -> scan_result_t {
×
1993
                    done = true;
215✔
1994
                    return scan_match{};
215✔
1995
                },
UNCOV
1996
                [&lph](const logfmt::parser::kvpair& kvp) -> scan_result_t {
×
1997
                    lph.lph_key_frag = kvp.first;
3,520✔
1998

1999
                    return kvp.second.match(
UNCOV
2000
                        [](const logfmt::parser::bool_value& bv)
×
2001
                            -> scan_result_t { return scan_match{}; },
×
2002
                        [&lph](const logfmt::parser::float_value& fv)
×
2003
                            -> scan_result_t {
2004
                            return lph.process_value(fv.fv_str_value);
5✔
2005
                        },
UNCOV
2006
                        [&lph](const logfmt::parser::int_value& iv)
×
2007
                            -> scan_result_t {
2008
                            return lph.process_value(iv.iv_str_value);
108✔
2009
                        },
2010
                        [&lph](const logfmt::parser::quoted_value& qv)
×
2011
                            -> scan_result_t {
2012
                            auto_mem<yajl_handle_t> handle(yajl_free);
313✔
2013
                            yajl_callbacks cb;
2014
                            scan_result_t retval;
313✔
2015

2016
                            memset(&cb, 0, sizeof(cb));
313✔
2017
                            handle = yajl_alloc(&cb, nullptr, &lph);
313✔
2018
                            cb.yajl_string = +[](void* ctx,
626✔
2019
                                                 const unsigned char* str,
2020
                                                 size_t len,
2021
                                                 yajl_string_props_t*) -> int {
2022
                                auto& lph = *((logfmt_pair_handler*) ctx);
313✔
2023
                                string_fragment value_frag{str, 0, (int) len};
313✔
2024

2025
                                auto value_res = lph.process_value(value_frag);
313✔
2026
                                return value_res.is<scan_match>();
626✔
2027
                            };
626✔
2028

2029
                            if (yajl_parse(
313✔
2030
                                    handle,
2031
                                    (const unsigned char*) qv.qv_value.data(),
313✔
2032
                                    qv.qv_value.length())
313✔
2033
                                    != yajl_status_ok
2034
                                || yajl_complete_parse(handle)
313✔
2035
                                    != yajl_status_ok)
2036
                            {
2037
                                log_debug("json parsing failed");
×
2038
                                string_fragment unq_frag{
2039
                                    qv.qv_value.sf_string,
×
UNCOV
2040
                                    qv.qv_value.sf_begin + 1,
×
2041
                                    qv.qv_value.sf_end - 1,
×
2042
                                };
2043

UNCOV
2044
                                return lph.process_value(unq_frag);
×
2045
                            }
2046

2047
                            return scan_match{};
313✔
2048
                        },
313✔
2049
                        [&lph](const logfmt::parser::unquoted_value& uv)
3,520✔
2050
                            -> scan_result_t {
2051
                            return lph.process_value(uv.uv_value);
3,094✔
2052
                        });
7,040✔
2053
                },
UNCOV
2054
                [](const logfmt::parser::error& err) -> scan_result_t {
×
2055
                    // log_error("logfmt parse error: %s", err.e_msg.c_str());
2056
                    return scan_no_match{};
10,202✔
2057
                });
13,937✔
2058
            if (value_res.is<scan_no_match>()) {
13,937✔
2059
                retval = value_res;
10,202✔
2060
                done = true;
10,202✔
2061
            }
2062
        }
13,937✔
2063

2064
        if (lph.lph_found_time) {
10,417✔
2065
            this->lf_timestamp_flags = lph.lph_time_tm.et_flags;
31✔
2066
            dst.emplace_back(
31✔
2067
                li.li_file_range.fr_offset, lph.lph_tv, lph.lph_level);
31✔
2068
            retval = scan_match{2000};
31✔
2069
        }
2070

2071
        return retval;
20,834✔
UNCOV
2072
    }
×
2073

2074
    void annotate(logfile* lf,
11✔
2075
                  uint64_t line_number,
2076
                  string_attrs_t& sa,
2077
                  logline_value_vector& values,
2078
                  bool annotate_module) const override
2079
    {
2080
        static const intern_string_t FIELDS_NAME
2081
            = intern_string::lookup("fields");
15✔
2082

2083
        auto& sbr = values.lvv_sbr;
11✔
2084
        auto p = logfmt::parser(sbr.to_string_fragment());
11✔
2085
        auto done = false;
11✔
2086
        auto found_body = false;
11✔
2087

2088
        while (!done) {
95✔
2089
            auto parse_result = p.step();
84✔
2090

2091
            done = parse_result.match(
168✔
2092
                [](const logfmt::parser::end_of_input&) { return true; },
11✔
UNCOV
2093
                [this, &sa, &values, &found_body](
×
2094
                    const logfmt::parser::kvpair& kvp) {
2095
                    auto value_frag = kvp.second.match(
73✔
UNCOV
2096
                        [this, &kvp, &values](
×
2097
                            const logfmt::parser::bool_value& bv) {
UNCOV
2098
                            auto lvm = logline_value_meta{intern_string::lookup(
×
UNCOV
2099
                                                              kvp.first),
×
2100
                                                          value_kind_t::
2101
                                                              VALUE_INTEGER,
2102
                                                          logline_value_meta::
UNCOV
2103
                                                              table_column{0},
×
UNCOV
2104
                                                          (log_format*) this}
×
UNCOV
2105
                                           .with_struct_name(FIELDS_NAME);
×
UNCOV
2106
                            values.lvv_values.emplace_back(lvm, bv.bv_value);
×
2107

UNCOV
2108
                            return bv.bv_str_value;
×
UNCOV
2109
                        },
×
UNCOV
2110
                        [this, &kvp, &values](
×
2111
                            const logfmt::parser::int_value& iv) {
UNCOV
2112
                            auto lvm = logline_value_meta{intern_string::lookup(
×
UNCOV
2113
                                                              kvp.first),
×
2114
                                                          value_kind_t::
2115
                                                              VALUE_INTEGER,
2116
                                                          logline_value_meta::
UNCOV
2117
                                                              table_column{0},
×
UNCOV
2118
                                                          (log_format*) this}
×
UNCOV
2119
                                           .with_struct_name(FIELDS_NAME);
×
UNCOV
2120
                            values.lvv_values.emplace_back(lvm, iv.iv_value);
×
2121

UNCOV
2122
                            return iv.iv_str_value;
×
UNCOV
2123
                        },
×
2124
                        [this, &kvp, &values](
73✔
2125
                            const logfmt::parser::float_value& fv) {
UNCOV
2126
                            auto lvm = logline_value_meta{intern_string::lookup(
×
UNCOV
2127
                                                              kvp.first),
×
2128
                                                          value_kind_t::
2129
                                                              VALUE_INTEGER,
2130
                                                          logline_value_meta::
UNCOV
2131
                                                              table_column{0},
×
UNCOV
2132
                                                          (log_format*) this}
×
UNCOV
2133
                                           .with_struct_name(FIELDS_NAME);
×
UNCOV
2134
                            values.lvv_values.emplace_back(lvm, fv.fv_value);
×
2135

UNCOV
2136
                            return fv.fv_str_value;
×
UNCOV
2137
                        },
×
UNCOV
2138
                        [](const logfmt::parser::quoted_value& qv) {
×
2139
                            return qv.qv_value;
24✔
2140
                        },
UNCOV
2141
                        [](const logfmt::parser::unquoted_value& uv) {
×
2142
                            return uv.uv_value;
49✔
2143
                        });
2144
                    auto value_lr
2145
                        = line_range{value_frag.sf_begin, value_frag.sf_end};
73✔
2146

2147
                    auto known_field = false;
73✔
2148
                    if (kvp.first.is_one_of(
73✔
2149
                            "timestamp"_frag, "time"_frag, "ts"_frag, "t"_frag))
2150
                    {
2151
                        sa.emplace_back(value_lr, L_TIMESTAMP.value());
11✔
2152
                        known_field = true;
11✔
2153
                    } else if (kvp.first.is_one_of("level"_frag, "lvl"_frag)) {
62✔
2154
                        sa.emplace_back(value_lr, L_LEVEL.value());
11✔
2155
                        known_field = true;
11✔
2156
                    } else if (kvp.first.is_one_of("msg"_frag, "message"_frag))
51✔
2157
                    {
2158
                        sa.emplace_back(value_lr, SA_BODY.value());
11✔
2159
                        found_body = true;
11✔
2160
                    } else if (kvp.second.is<logfmt::parser::quoted_value>()
40✔
2161
                               || kvp.second
78✔
2162
                                      .is<logfmt::parser::unquoted_value>())
38✔
2163
                    {
2164
                        auto lvm
2165
                            = logline_value_meta{intern_string::lookup(
160✔
2166
                                                     kvp.first),
40✔
2167
                                                 value_frag.startswith("\"")
40✔
2168
                                                     ? value_kind_t::VALUE_JSON
2169
                                                     : value_kind_t::VALUE_TEXT,
2170
                                                 logline_value_meta::
2171
                                                     table_column{0},
40✔
2172
                                                 (log_format*) this}
80✔
2173
                                  .with_struct_name(FIELDS_NAME);
40✔
2174
                        values.lvv_values.emplace_back(lvm, value_frag);
40✔
2175
                    }
40✔
2176
                    if (known_field) {
73✔
2177
                        auto key_with_eq = kvp.first;
22✔
2178
                        key_with_eq.sf_end += 1;
22✔
2179
                        sa.emplace_back(to_line_range(key_with_eq),
22✔
2180
                                        SA_REPLACED.value());
44✔
2181
                    } else {
2182
                        sa.emplace_back(to_line_range(kvp.first),
51✔
2183
                                        VC_ROLE.value(role_t::VCR_OBJECT_KEY));
102✔
2184
                    }
2185
                    return false;
73✔
2186
                },
2187
                [line_number, &sbr](const logfmt::parser::error& err) {
84✔
UNCOV
2188
                    log_error("bad line %.*s", sbr.length(), sbr.get_data());
×
UNCOV
2189
                    log_error("%lld:logfmt parse error: %s",
×
2190
                              line_number,
2191
                              err.e_msg.c_str());
UNCOV
2192
                    return true;
×
2193
                });
2194
        }
84✔
2195

2196
        if (!found_body) {
11✔
UNCOV
2197
            sa.emplace_back(line_range::empty_at(sbr.length()),
×
UNCOV
2198
                            SA_BODY.value());
×
2199
        }
2200

2201
        log_format::annotate(lf, line_number, sa, values, annotate_module);
11✔
2202
    }
11✔
2203

2204
    std::shared_ptr<log_format> specialized(int fmt_lock) override
5✔
2205
    {
2206
        auto retval = std::make_shared<logfmt_format>(*this);
5✔
2207

2208
        retval->lf_specialized = true;
5✔
2209
        return retval;
10✔
2210
    }
5✔
2211
};
2212

2213
static auto format_binder = injector::bind_multiple<log_format>()
2214
                                .add<logfmt_format>()
2215
                                .add<bro_log_format>()
2216
                                .add<w3c_log_format>()
2217
                                .add<generic_log_format>()
2218
                                .add<piper_log_format>();
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc