• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tstack / lnav / 19243988760-2657

10 Nov 2025 07:37PM UTC coverage: 68.747% (-0.3%) from 69.055%
19243988760-2657

push

github

tstack
[logfile] lay groundwork for bounding log file times

Related to #1188

308 of 655 new or added lines in 35 files covered. (47.02%)

30 existing lines in 7 files now uncovered.

50645 of 73669 relevant lines covered (68.75%)

430651.53 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.07
/src/log_format_impls.cc
1
/**
2
 * Copyright (c) 2007-2017, Timothy Stack
3
 *
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * * Redistributions of source code must retain the above copyright notice, this
10
 * list of conditions and the following disclaimer.
11
 * * Redistributions in binary form must reproduce the above copyright notice,
12
 * this list of conditions and the following disclaimer in the documentation
13
 * and/or other materials provided with the distribution.
14
 * * Neither the name of Timothy Stack nor the names of its contributors
15
 * may be used to endorse or promote products derived from this software
16
 * without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 *
29
 * @file log_format_impls.cc
30
 */
31

32
#include <algorithm>
33
#include <chrono>
34
#include <memory>
35
#include <utility>
36

37
#include "log_format.hh"
38

39
#include <stdio.h>
40

41
#include "base/injector.bind.hh"
42
#include "base/opt_util.hh"
43
#include "base/string_attr_type.hh"
44
#include "config.h"
45
#include "formats/logfmt/logfmt.parser.hh"
46
#include "log_vtab_impl.hh"
47
#include "ptimec.hh"
48
#include "scn/scan.h"
49
#include "sql_util.hh"
50
#include "yajlpp/yajlpp.hh"
51

52
class piper_log_format : public log_format {
53
public:
54
    const intern_string_t get_name() const override
14,616✔
55
    {
56
        static const intern_string_t RETVAL
57
            = intern_string::lookup("lnav_piper_log");
16,096✔
58

59
        return RETVAL;
14,616✔
60
    }
61

62
    scan_result_t scan(logfile& lf,
11,883✔
63
                       std::vector<logline>& dst,
64
                       const line_info& li,
65
                       shared_buffer_ref& sbr,
66
                       scan_batch_context& sbc) override
67
    {
68
        if (lf.has_line_metadata()
11,883✔
69
            && lf.get_text_format() == text_format_t::TF_LOG)
11,883✔
70
        {
71
            dst.emplace_back(
293✔
72
                li.li_file_range.fr_offset, li.li_timestamp, li.li_level);
293✔
73
            return scan_match{1};
293✔
74
        }
75

76
        return scan_no_match{"not a piper capture"};
11,590✔
77
    }
78

79
    static constexpr int TIMESTAMP_SIZE = 28;
80

81
    void annotate(logfile* lf,
41✔
82
                  uint64_t line_number,
83
                  string_attrs_t& sa,
84
                  logline_value_vector& values) const override
85
    {
86
        auto lr = line_range{0, TIMESTAMP_SIZE};
41✔
87
        sa.emplace_back(lr, L_TIMESTAMP.value());
41✔
88
        log_format::annotate(lf, line_number, sa, values);
41✔
89
    }
41✔
90

91
    void get_subline(const log_format_file_state& lffs,
317✔
92
                     const logline& ll,
93
                     shared_buffer_ref& sbr,
94
                     subline_options opts) override
95
    {
96
        this->plf_cached_line.resize(TIMESTAMP_SIZE);
317✔
97
        auto tlen = sql_strftime(this->plf_cached_line.data(),
317✔
98
                                 this->plf_cached_line.size(),
99
                                 ll.get_timeval(),
317✔
100
                                 'T');
101
        this->plf_cached_line.resize(tlen);
317✔
102
        {
103
            char zone_str[16];
104
            exttm tmptm;
317✔
105

106
            tmptm.et_flags |= ETF_ZONE_SET;
317✔
107
            tmptm.et_gmtoff
108
                = lnav::local_time_to_info(
634✔
109
                      date::local_seconds{ll.get_time<std::chrono::seconds>()})
317✔
110
                      .first.offset.count();
317✔
111
            off_t zone_len = 0;
317✔
112
            ftime_z(zone_str, zone_len, sizeof(zone_str), tmptm);
317✔
113
            for (off_t lpc = 0; lpc < zone_len; lpc++) {
1,902✔
114
                this->plf_cached_line.push_back(zone_str[lpc]);
1,585✔
115
            }
116
        }
117
        this->plf_cached_line.push_back(' ');
317✔
118
        const auto prefix_len = this->plf_cached_line.size();
317✔
119
        this->plf_cached_line.resize(this->plf_cached_line.size()
634✔
120
                                     + sbr.length());
317✔
121
        memcpy(
317✔
122
            &this->plf_cached_line[prefix_len], sbr.get_data(), sbr.length());
317✔
123

124
        sbr.share(this->plf_share_manager,
634✔
125
                  this->plf_cached_line.data(),
317✔
126
                  this->plf_cached_line.size());
127
    }
317✔
128

129
    std::shared_ptr<log_format> specialized(int fmt_lock) override
6✔
130
    {
131
        auto retval = std::make_shared<piper_log_format>(*this);
6✔
132

133
        retval->lf_specialized = true;
6✔
134
        retval->lf_timestamp_flags |= ETF_ZONE_SET | ETF_MICROS_SET;
6✔
135
        return retval;
12✔
136
    }
6✔
137

138
private:
139
    shared_buffer plf_share_manager;
140
    std::vector<char> plf_cached_line;
141
};
142

143
class generic_log_format : public log_format {
144
public:
145
    static const pcre_format* get_pcre_log_formats()
11,932✔
146
    {
147
        static const pcre_format log_fmt[] = {
148
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>@[0-9a-zA-Z]{16,24}))"),
149
            pcre_format(
150
                R"(^(?:\*\*\*\s+)?(?<timestamp>(?:\s|\d{4}[\-\/]\d{2}[\-\/]\d{2}|T|\d{1,2}:\d{2}(?::\d{2}(?:[\.,]\d{1,6})?)?|Z|[+\-]\d{2}:?\d{2}|(?!DBG|DEBUG|ERR|INFO|WARN|NONE)[A-Z]{3,4})+)[:|\s]?(trc|trace|dbg|debug|info|warn(?:ing)?|err(?:or)?)[:|\s]\s*)"),
151
            pcre_format(
152
                R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,+/-]+) \[(trace|debug|info|warn(?:ing)?|error|critical)\]\s+)"),
153
            pcre_format(
154
                R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,+/-]+) -- (trace|debug|info|warn(?:ing)?|error|critical) --\s+)"),
155

156
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+/\.-]+) \[\w\s+)"),
157
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+,/\.-]+)\s+)"),
158
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+,/\.-]+) -\s+)"),
159
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,/-]+) -\s+)"),
160
            pcre_format(
161
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\] \[(trace|debug|info|warn(?:ing)?|error|critical)\]\s+)"),
162
            pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w: "
163
                        "\\.,/-]+)\\[[^\\]]+\\]\\s+"),
164
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,/-]+)\s+)"),
165

166
            pcre_format(
167
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\]\s*(\w+):?\s+)"),
168
            pcre_format(
169
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\]\s+)"),
170
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: "
171
                        "\\.,+/-]+)\\] \\w+\\s+"),
172
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: ,+/-]+)\\] "
173
                        "\\(\\d+\\)\\s+"),
174

175
            pcre_format(),
176
        };
11,932✔
177

178
        return log_fmt;
11,932✔
179
    }
180

NEW
181
    std::string get_pattern_regex(const pattern_locks& pl,
×
182
                                  uint64_t line_number) const override
183
    {
NEW
184
        auto pat_index = pl.pattern_index_for_line(line_number);
×
185
        return get_pcre_log_formats()[pat_index].name;
×
186
    }
187

188
    const intern_string_t get_name() const override
14,241✔
189
    {
190
        static const intern_string_t RETVAL
191
            = intern_string::lookup("generic_log");
15,721✔
192

193
        return RETVAL;
14,241✔
194
    }
195

196
    scan_result_t scan(logfile& lf,
11,846✔
197
                       std::vector<logline>& dst,
198
                       const line_info& li,
199
                       shared_buffer_ref& sbr,
200
                       scan_batch_context& sbc) override
201
    {
202
        exttm log_time;
11,846✔
203
        timeval log_tv;
204
        string_fragment ts;
11,846✔
205
        std::optional<string_fragment> level;
11,846✔
206
        const char* last_pos;
207

208
        if (dst.empty()) {
11,846✔
209
            auto file_options = lf.get_file_options();
199✔
210

211
            if (file_options) {
199✔
212
                this->lf_date_time.dts_default_zone
213
                    = file_options->second.fo_default_zone.pp_value;
2✔
214
            } else {
215
                this->lf_date_time.dts_default_zone = nullptr;
197✔
216
            }
217
        }
199✔
218

219
        if ((last_pos = this->log_scanf(sbc,
23,692✔
220
                                        dst.size(),
11,846✔
221
                                        sbr.to_string_fragment(),
222
                                        get_pcre_log_formats(),
223
                                        nullptr,
224
                                        &log_time,
225
                                        &log_tv,
226

227
                                        &ts,
228
                                        &level))
229
            != nullptr)
11,846✔
230
        {
231
            auto level_val = log_level_t::LEVEL_UNKNOWN;
2,363✔
232
            if (level) {
2,363✔
233
                level_val = string2level(level->data(), level->length());
2,363✔
234
            }
235

236
            if (!((log_time.et_flags & ETF_DAY_SET)
2,363✔
237
                  && (log_time.et_flags & ETF_MONTH_SET)
2,288✔
238
                  && (log_time.et_flags & ETF_YEAR_SET)))
2,288✔
239
            {
240
                this->check_for_new_year(dst, log_time, log_tv);
684✔
241
            }
242

243
            if (!(this->lf_timestamp_flags
4,726✔
244
                  & (ETF_MILLIS_SET | ETF_MICROS_SET | ETF_NANOS_SET))
2,363✔
245
                && !dst.empty()
2,038✔
246
                && dst.back().get_time<std::chrono::seconds>().count()
2,036✔
247
                    == log_tv.tv_sec
2,036✔
248
                && dst.back()
5,261✔
249
                        .get_subsecond_time<std::chrono::microseconds>()
3,223✔
250
                        .count()
860✔
251
                    != 0)
252
            {
253
                auto log_ms
254
                    = dst.back()
×
255
                          .get_subsecond_time<std::chrono::microseconds>();
×
256

257
                log_time.et_nsec
258
                    = std::chrono::duration_cast<std::chrono::nanoseconds>(
×
259
                          log_ms)
260
                          .count();
×
261
                log_tv.tv_usec
262
                    = std::chrono::duration_cast<std::chrono::microseconds>(
×
263
                          log_ms)
264
                          .count();
×
265
            }
266

267
            auto log_us = to_us(log_tv);
2,363✔
268
            auto tid_iter = sbc.sbc_tids.insert_tid(
2,363✔
269
                sbc.sbc_allocator, string_fragment{}, log_us);
×
270
            tid_iter->second.titr_level_stats.update_msg_count(level_val);
2,363✔
271
            dst.emplace_back(li.li_file_range.fr_offset, log_us, level_val);
2,363✔
272
            return scan_match{5};
2,363✔
273
        }
274

275
        return scan_no_match{"no patterns matched"};
9,483✔
276
    }
277

278
    void annotate(logfile* lf,
86✔
279
                  uint64_t line_number,
280
                  string_attrs_t& sa,
281
                  logline_value_vector& values) const override
282
    {
283
        thread_local auto md = lnav::pcre2pp::match_data::unitialized();
86✔
284
        auto lffs = lf->get_format_file_state();
86✔
285
        auto& line = values.lvv_sbr;
86✔
286
        int pat_index
287
            = lffs.lffs_pattern_locks.pattern_index_for_line(line_number);
86✔
288
        const auto& fmt = get_pcre_log_formats()[pat_index];
86✔
289
        int prefix_len = 0;
86✔
290
        const auto line_sf = line.to_string_fragment();
86✔
291
        auto match_res = fmt.pcre->capture_from(line_sf)
86✔
292
                             .into(md)
86✔
293
                             .matches(PCRE2_NO_UTF_CHECK)
172✔
294
                             .ignore_error();
86✔
295
        if (!match_res) {
86✔
296
            return;
7✔
297
        }
298

299
        auto ts_cap = md[fmt.pf_timestamp_index].value();
79✔
300
        auto lr = to_line_range(ts_cap.trim());
79✔
301
        sa.emplace_back(lr, L_TIMESTAMP.value());
79✔
302

303
        values.lvv_values.emplace_back(TS_META, line, lr);
79✔
304
        values.lvv_values.back().lv_meta.lvm_format = (log_format*) this;
79✔
305

306
        prefix_len = md[0]->sf_end;
79✔
307
        auto level_cap = md[2];
79✔
308
        if (level_cap) {
79✔
309
            if (string2level(level_cap->data(), level_cap->length(), true)
73✔
310
                != LEVEL_UNKNOWN)
73✔
311
            {
312
                values.lvv_values.emplace_back(
73✔
313
                    LEVEL_META, line, to_line_range(level_cap->trim()));
73✔
314
                values.lvv_values.back().lv_meta.lvm_format
73✔
315
                    = (log_format*) this;
73✔
316

317
                lr = to_line_range(level_cap->trim());
73✔
318
                if (lr.lr_end != (ssize_t) line.length()) {
73✔
319
                    sa.emplace_back(lr, L_LEVEL.value());
73✔
320
                }
321
            }
322
        }
323

324
        lr.lr_start = 0;
79✔
325
        lr.lr_end = prefix_len;
79✔
326
        sa.emplace_back(lr, L_PREFIX.value());
79✔
327

328
        lr.lr_start = prefix_len;
79✔
329
        lr.lr_end = line.length();
79✔
330
        sa.emplace_back(lr, SA_BODY.value());
79✔
331

332
        log_format::annotate(lf, line_number, sa, values);
79✔
333
    }
334

335
    std::shared_ptr<log_format> specialized(int fmt_lock) override
50✔
336
    {
337
        auto retval = std::make_shared<generic_log_format>(*this);
50✔
338

339
        retval->lf_specialized = true;
50✔
340
        return retval;
100✔
341
    }
50✔
342

343
    bool hide_field(const intern_string_t field_name, bool val) override
2✔
344
    {
345
        if (field_name == TS_META.lvm_name) {
2✔
346
            TS_META.lvm_user_hidden = val;
1✔
347
            return true;
1✔
348
        }
349
        if (field_name == LEVEL_META.lvm_name) {
1✔
350
            LEVEL_META.lvm_user_hidden = val;
1✔
351
            return true;
1✔
352
        }
353
        if (field_name == OPID_META.lvm_name) {
×
354
            OPID_META.lvm_user_hidden = val;
×
355
            return true;
×
356
        }
357
        return false;
×
358
    }
359

360
    std::map<intern_string_t, logline_value_meta> get_field_states() override
52✔
361
    {
362
        return {
363
            {TS_META.lvm_name, TS_META},
364
            {LEVEL_META.lvm_name, LEVEL_META},
365
            {OPID_META.lvm_name, OPID_META},
366
        };
260✔
367
    }
52✔
368

369
private:
370
    static logline_value_meta TS_META;
371
    static logline_value_meta LEVEL_META;
372
    static logline_value_meta OPID_META;
373
};
374

375
logline_value_meta generic_log_format::TS_META{
376
    intern_string::lookup("log_time"),
377
    value_kind_t::VALUE_TEXT,
378
    logline_value_meta::table_column{2},
379
};
380

381
logline_value_meta generic_log_format::LEVEL_META{
382
    intern_string::lookup("log_level"),
383
    value_kind_t::VALUE_TEXT,
384
    logline_value_meta::table_column{3},
385
};
386

387
logline_value_meta generic_log_format::OPID_META{
388
    intern_string::lookup("log_opid"),
389
    value_kind_t::VALUE_TEXT,
390
    logline_value_meta::internal_column{},
391
};
392

393
std::string
394
from_escaped_string(const char* str, size_t len)
22✔
395
{
396
    std::string retval;
22✔
397

398
    for (size_t lpc = 0; lpc < len; lpc++) {
44✔
399
        switch (str[lpc]) {
22✔
400
            case '\\':
22✔
401
                if ((lpc + 3) < len && str[lpc + 1] == 'x') {
22✔
402
                    int ch;
403

404
                    if (sscanf(&str[lpc + 2], "%2x", &ch) == 1) {
22✔
405
                        retval.append(1, (char) ch & 0xff);
22✔
406
                        lpc += 3;
22✔
407
                    }
408
                }
409
                break;
22✔
410
            default:
×
411
                retval.append(1, str[lpc]);
×
412
                break;
×
413
        }
414
    }
415

416
    return retval;
22✔
417
}
×
418

419
std::optional<const char*>
420
lnav_strnstr(const char* s, const char* find, size_t slen)
1,577,674✔
421
{
422
    char c, sc;
423
    size_t len;
424

425
    if ((c = *find++) != '\0') {
1,577,674✔
426
        len = strlen(find);
1,577,674✔
427
        do {
428
            do {
429
                if (slen < 1 || (sc = *s) == '\0') {
6,761,318✔
430
                    return std::nullopt;
856,646✔
431
                }
432
                --slen;
5,904,672✔
433
                ++s;
5,904,672✔
434
            } while (sc != c);
5,904,672✔
435
            if (len > slen) {
721,028✔
436
                return std::nullopt;
×
437
            }
438
        } while (strncmp(s, find, len) != 0);
721,028✔
439
        s--;
721,028✔
440
    }
441
    return s;
721,028✔
442
}
443

444
struct separated_string {
445
    const char* ss_str;
446
    size_t ss_len;
447
    const char* ss_separator;
448
    size_t ss_separator_len;
449

450
    separated_string(const char* str, size_t len)
34,122✔
451
        : ss_str(str), ss_len(len), ss_separator(","),
34,122✔
452
          ss_separator_len(strlen(this->ss_separator))
34,122✔
453
    {
454
    }
34,122✔
455

456
    separated_string& with_separator(const char* sep)
34,122✔
457
    {
458
        this->ss_separator = sep;
34,122✔
459
        this->ss_separator_len = strlen(sep);
34,122✔
460
        return *this;
34,122✔
461
    }
462

463
    struct iterator {
464
        const separated_string& i_parent;
465
        const char* i_pos;
466
        const char* i_next_pos;
467
        size_t i_index;
468

469
        iterator(const separated_string& ss, const char* pos)
822,882✔
470
            : i_parent(ss), i_pos(pos), i_next_pos(pos), i_index(0)
822,882✔
471
        {
472
            this->update();
822,882✔
473
        }
822,882✔
474

475
        void update()
1,577,674✔
476
        {
477
            const separated_string& ss = this->i_parent;
1,577,674✔
478
            auto next_field
479
                = lnav_strnstr(this->i_pos,
1,577,674✔
480
                               ss.ss_separator,
1,577,674✔
481
                               ss.ss_len - (this->i_pos - ss.ss_str));
1,577,674✔
482
            if (next_field) {
1,577,674✔
483
                this->i_next_pos = next_field.value() + ss.ss_separator_len;
721,028✔
484
            } else {
485
                this->i_next_pos = ss.ss_str + ss.ss_len;
856,646✔
486
            }
487
        }
1,577,674✔
488

489
        iterator& operator++()
754,792✔
490
        {
491
            this->i_pos = this->i_next_pos;
754,792✔
492
            this->update();
754,792✔
493
            this->i_index += 1;
754,792✔
494

495
            return *this;
754,792✔
496
        }
497

498
        string_fragment operator*()
678,174✔
499
        {
500
            const auto& ss = this->i_parent;
678,174✔
501
            int end;
502

503
            if (this->i_next_pos < (ss.ss_str + ss.ss_len)) {
678,174✔
504
                end = this->i_next_pos - ss.ss_str - ss.ss_separator_len;
648,446✔
505
            } else {
506
                end = this->i_next_pos - ss.ss_str;
29,728✔
507
            }
508
            return string_fragment::from_byte_range(
678,174✔
509
                ss.ss_str, this->i_pos - ss.ss_str, end);
678,174✔
510
        }
511

512
        bool operator==(const iterator& other) const
788,760✔
513
        {
514
            return (&this->i_parent == &other.i_parent)
788,760✔
515
                && (this->i_pos == other.i_pos);
788,760✔
516
        }
517

518
        bool operator!=(const iterator& other) const
788,606✔
519
        {
520
            return !(*this == other);
788,606✔
521
        }
522

523
        size_t index() const { return this->i_index; }
1,626,382✔
524
    };
525

526
    iterator begin() { return {*this, this->ss_str}; }
34,122✔
527

528
    iterator end() { return {*this, this->ss_str + this->ss_len}; }
788,760✔
529
};
530

531
class bro_log_format : public log_format {
532
public:
533
    static const intern_string_t TS;
534
    static const intern_string_t DURATION;
535
    struct field_def {
536
        logline_value_meta fd_meta;
537
        logline_value_meta* fd_root_meta;
538
        std::string fd_collator;
539
        std::optional<size_t> fd_numeric_index;
540

541
        explicit field_def(const intern_string_t name,
622✔
542
                           size_t col,
543
                           log_format* format)
544
            : fd_meta(name,
1,244✔
545
                      value_kind_t::VALUE_TEXT,
546
                      logline_value_meta::table_column{col},
622✔
547
                      format),
548
              fd_root_meta(&FIELD_META.find(name)->second)
622✔
549
        {
550
        }
622✔
551

552
        field_def& with_kind(value_kind_t kind,
458✔
553
                             bool identifier = false,
554
                             bool foreign_key = false,
555
                             const std::string& collator = "")
556
        {
557
            this->fd_meta.lvm_kind = kind;
458✔
558
            this->fd_meta.lvm_identifier = identifier;
458✔
559
            this->fd_meta.lvm_foreign_key = foreign_key;
458✔
560
            this->fd_collator = collator;
458✔
561
            return *this;
458✔
562
        }
563

564
        field_def& with_numeric_index(size_t index)
116✔
565
        {
566
            this->fd_numeric_index = index;
116✔
567
            return *this;
116✔
568
        }
569
    };
570

571
    static std::unordered_map<const intern_string_t, logline_value_meta>
572
        FIELD_META;
573

574
    static const intern_string_t get_opid_desc()
746✔
575
    {
576
        static const intern_string_t RETVAL = intern_string::lookup("std");
2,238✔
577

578
        return RETVAL;
746✔
579
    }
580

581
    bro_log_format()
746✔
582
    {
746✔
583
        this->lf_structured = true;
746✔
584
        this->lf_is_self_describing = true;
746✔
585
        this->lf_time_ordered = false;
746✔
586
        this->lf_timestamp_point_of_reference
587
            = timestamp_point_of_reference_t::start;
746✔
588

589
        auto desc_v = std::make_shared<std::vector<opid_descriptor>>();
746✔
590
        desc_v->emplace({});
746✔
591
        auto emplace_res = this->lf_opid_description_def->emplace(
1,492✔
592
            get_opid_desc(), opid_descriptors{desc_v, 0});
1,492✔
593
        this->lf_opid_description_def_vec->emplace_back(
746✔
594
            &emplace_res.first->second);
746✔
595
    }
746✔
596

597
    const intern_string_t get_name() const override
115,534✔
598
    {
599
        static const intern_string_t name(intern_string::lookup("bro"));
117,014✔
600

601
        return this->blf_format_name.empty() ? name : this->blf_format_name;
115,534✔
602
    }
603

604
    void clear() override
11,905✔
605
    {
606
        this->log_format::clear();
11,905✔
607
        this->blf_format_name.clear();
11,905✔
608
        this->blf_field_defs.clear();
11,905✔
609
    }
11,905✔
610

611
    scan_result_t scan_int(std::vector<logline>& dst,
4,168✔
612
                           const line_info& li,
613
                           shared_buffer_ref& sbr,
614
                           scan_batch_context& sbc)
615
    {
616
        static const intern_string_t STATUS_CODE
617
            = intern_string::lookup("bro_status_code");
4,210✔
618
        static const intern_string_t UID = intern_string::lookup("bro_uid");
4,210✔
619
        static const intern_string_t ID_ORIG_H
620
            = intern_string::lookup("bro_id_orig_h");
4,210✔
621

622
        separated_string ss(sbr.get_data(), sbr.length());
4,168✔
623
        timeval tv;
624
        exttm tm;
4,168✔
625
        auto found_ts = false;
4,168✔
626
        log_level_t level = LEVEL_INFO;
4,168✔
627
        uint16_t opid = 0;
4,168✔
628
        auto opid_cap = string_fragment::invalid();
4,168✔
629
        auto host_cap = string_fragment::invalid();
4,168✔
630
        auto duration = std::chrono::microseconds{0};
4,168✔
631

632
        sbc.sbc_value_stats.resize(this->blf_field_defs.size());
4,168✔
633
        ss.with_separator(this->blf_separator.get());
4,168✔
634

635
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
122,914✔
636
            if (iter.index() == 0 && *iter == "#close") {
118,768✔
637
                return scan_match{2000};
22✔
638
            }
639

640
            if (iter.index() >= this->blf_field_defs.size()) {
118,746✔
641
                break;
×
642
            }
643

644
            const auto& fd = this->blf_field_defs[iter.index()];
118,746✔
645

646
            if (TS == fd.fd_meta.lvm_name) {
118,746✔
647
                static const char* const TIME_FMT[] = {"%s.%f"};
648
                const auto sf = *iter;
4,146✔
649

650
                if (this->lf_date_time.scan(
4,146✔
651
                        sf.data(), sf.length(), TIME_FMT, &tm, tv))
4,146✔
652
                {
653
                    this->lf_timestamp_flags = tm.et_flags;
4,146✔
654
                    found_ts = true;
4,146✔
655
                }
656
            } else if (STATUS_CODE == fd.fd_meta.lvm_name) {
114,600✔
657
                const auto sf = *iter;
3,960✔
658

659
                if (!sf.empty() && sf[0] >= '4') {
3,960✔
660
                    level = LEVEL_ERROR;
20✔
661
                }
662
            } else if (UID == fd.fd_meta.lvm_name) {
110,640✔
663
                opid_cap = *iter;
4,146✔
664

665
                opid = opid_cap.hash();
4,146✔
666
            } else if (ID_ORIG_H == fd.fd_meta.lvm_name) {
106,494✔
667
                host_cap = *iter;
4,146✔
668
            } else if (DURATION == fd.fd_meta.lvm_name) {
102,348✔
669
                const auto sf = *iter;
186✔
670
                auto scan_res = scn::scan<double>("{}", sf.to_string_view());
186✔
671
                if (scan_res) {
186✔
672
                    duration = std::chrono::microseconds{
×
673
                        static_cast<long long>(scan_res->value() * 1000000)};
674
                }
675
            }
676

677
            if (fd.fd_numeric_index) {
118,746✔
678
                switch (fd.fd_meta.lvm_kind) {
21,288✔
679
                    case value_kind_t::VALUE_INTEGER:
21,288✔
680
                    case value_kind_t::VALUE_FLOAT: {
681
                        const auto sv = (*iter).to_string_view();
21,288✔
682
                        auto scan_float_res = scn::scan_value<double>(sv);
21,288✔
683
                        if (scan_float_res) {
21,288✔
684
                            sbc.sbc_value_stats[fd.fd_numeric_index.value()]
17,328✔
685
                                .add_value(scan_float_res->value());
17,328✔
686
                        }
687
                        break;
21,288✔
688
                    }
689
                    default:
×
690
                        break;
×
691
                }
692
            }
693
        }
694

695
        if (found_ts) {
4,146✔
696
            if (!this->lf_specialized) {
4,146✔
697
                for (auto& ll : dst) {
198✔
698
                    ll.set_ignore(true);
176✔
699
                }
700
            }
701

702
            auto log_us = to_us(tv);
4,146✔
703
            if (opid_cap.is_valid()) {
4,146✔
704
                auto opid_iter = sbc.sbc_opids.insert_op(
4,146✔
705
                    sbc.sbc_allocator,
706
                    opid_cap,
707
                    log_us,
708
                    this->lf_timestamp_point_of_reference,
709
                    duration);
710
                opid_iter->second.otr_level_stats.update_msg_count(level);
4,146✔
711

712
                auto& otr = opid_iter->second;
4,146✔
713
                if (!otr.otr_description.lod_index && host_cap.is_valid()
6,012✔
714
                    && otr.otr_description.lod_elements.empty())
6,012✔
715
                {
716
                    otr.otr_description.lod_index = 0;
1,866✔
717
                    otr.otr_description.lod_elements.insert(
3,732✔
718
                        0, host_cap.to_string());
1,866✔
719
                }
720
            }
721
            dst.emplace_back(
4,146✔
722
                li.li_file_range.fr_offset, log_us, level, opid);
4,146✔
723
            dst.back().set_opid(opid);
4,146✔
724
            return scan_match{2000};
4,146✔
725
        }
726
        return scan_no_match{"no header found"};
×
727
    }
728

729
    scan_result_t scan(logfile& lf,
11,883✔
730
                       std::vector<logline>& dst,
731
                       const line_info& li,
732
                       shared_buffer_ref& sbr,
733
                       scan_batch_context& sbc) override
734
    {
735
        static const auto SEP_RE
736
            = lnav::pcre2pp::code::from_const(R"(^#separator\s+(.+))");
11,883✔
737

738
        if (dst.empty()) {
11,883✔
739
            auto file_options = lf.get_file_options();
1,140✔
740

741
            if (file_options) {
1,140✔
742
                this->lf_date_time.dts_default_zone
743
                    = file_options->second.fo_default_zone.pp_value;
57✔
744
            } else {
745
                this->lf_date_time.dts_default_zone = nullptr;
1,083✔
746
            }
747
        }
1,140✔
748

749
        if (!this->blf_format_name.empty()) {
11,883✔
750
            return this->scan_int(dst, li, sbr, sbc);
4,146✔
751
        }
752

753
        if (dst.empty() || dst.size() > 20 || sbr.empty()
14,334✔
754
            || sbr.get_data()[0] == '#')
14,334✔
755
        {
756
            return scan_no_match{"no header found"};
5,174✔
757
        }
758

759
        auto line_iter = dst.begin();
2,563✔
760
        auto read_result = lf.read_line(line_iter);
2,563✔
761

762
        if (read_result.isErr()) {
2,563✔
763
            return scan_no_match{"unable to read first line"};
×
764
        }
765

766
        auto line = read_result.unwrap();
2,563✔
767
        auto md = SEP_RE.create_match_data();
2,563✔
768

769
        auto match_res = SEP_RE.capture_from(line.to_string_fragment())
2,563✔
770
                             .into(md)
2,563✔
771
                             .matches(PCRE2_NO_UTF_CHECK)
5,126✔
772
                             .ignore_error();
2,563✔
773
        if (!match_res) {
2,563✔
774
            return scan_no_match{"cannot read separator header"};
2,541✔
775
        }
776

777
        this->clear();
22✔
778

779
        auto sep = from_escaped_string(md[1]->data(), md[1]->length());
22✔
780
        this->blf_separator = intern_string::lookup(sep);
22✔
781

782
        for (++line_iter; line_iter != dst.end(); ++line_iter) {
176✔
783
            auto next_read_result = lf.read_line(line_iter);
154✔
784

785
            if (next_read_result.isErr()) {
154✔
786
                return scan_no_match{"unable to read header line"};
×
787
            }
788

789
            line = next_read_result.unwrap();
154✔
790
            separated_string ss(line.get_data(), line.length());
154✔
791

792
            ss.with_separator(this->blf_separator.get());
154✔
793
            auto iter = ss.begin();
154✔
794

795
            string_fragment directive = *iter;
154✔
796

797
            if (directive.empty() || directive[0] != '#') {
154✔
798
                continue;
×
799
            }
800

801
            ++iter;
154✔
802
            if (iter == ss.end()) {
154✔
803
                continue;
×
804
            }
805

806
            if (directive == "#set_separator") {
154✔
807
                this->blf_set_separator = intern_string::lookup(*iter);
22✔
808
            } else if (directive == "#empty_field") {
132✔
809
                this->blf_empty_field = intern_string::lookup(*iter);
22✔
810
            } else if (directive == "#unset_field") {
110✔
811
                this->blf_unset_field = intern_string::lookup(*iter);
22✔
812
            } else if (directive == "#path") {
88✔
813
                auto full_name = fmt::format(FMT_STRING("bro_{}_log"), *iter);
66✔
814
                this->blf_format_name = intern_string::lookup(full_name);
22✔
815
            } else if (directive == "#fields" && this->blf_field_defs.empty()) {
88✔
816
                do {
817
                    auto field_name
818
                        = intern_string::lookup("bro_" + sql_safe_ident(*iter));
622✔
819
                    auto common_iter = FIELD_META.find(field_name);
622✔
820
                    if (common_iter == FIELD_META.end()) {
622✔
821
                        FIELD_META.emplace(field_name,
616✔
822
                                           logline_value_meta{
1,232✔
823
                                               field_name,
824
                                               value_kind_t::VALUE_TEXT,
825
                                           });
826
                    }
827
                    this->blf_field_defs.emplace_back(
1,244✔
828
                        field_name, this->blf_field_defs.size(), this);
622✔
829
                    ++iter;
622✔
830
                } while (iter != ss.end());
622✔
831
            } else if (directive == "#types") {
44✔
832
                static const char* KNOWN_IDS[] = {
833
                    "bro_conn_uids",
834
                    "bro_fuid",
835
                    "bro_host",
836
                    "bro_info_code",
837
                    "bro_method",
838
                    "bro_mime_type",
839
                    "bro_orig_fuids",
840
                    "bro_parent_fuid",
841
                    "bro_proto",
842
                    "bro_referrer",
843
                    "bro_resp_fuids",
844
                    "bro_service",
845
                    "bro_uid",
846
                    "bro_uri",
847
                    "bro_user_agent",
848
                    "bro_username",
849
                };
850
                static const char* KNOWN_FOREIGN[] = {
851
                    "bro_status_code",
852
                };
853

854
                int numeric_count = 0;
22✔
855

856
                do {
857
                    string_fragment field_type = *iter;
622✔
858
                    auto& fd = this->blf_field_defs[iter.index() - 1];
622✔
859

860
                    if (field_type == "time") {
622✔
861
                        fd.with_kind(value_kind_t::VALUE_TIMESTAMP);
44✔
862
                    } else if (field_type == "string") {
600✔
863
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
456✔
864
                                                        std::end(KNOWN_IDS),
865
                                                        fd.fd_meta.lvm_name);
228✔
866
                        fd.with_kind(value_kind_t::VALUE_TEXT, ident);
456✔
867
                    } else if (field_type == "count") {
372✔
868
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
228✔
869
                                                        std::end(KNOWN_IDS),
870
                                                        fd.fd_meta.lvm_name);
114✔
871
                        bool foreign
872
                            = std::binary_search(std::begin(KNOWN_FOREIGN),
228✔
873
                                                 std::end(KNOWN_FOREIGN),
874
                                                 fd.fd_meta.lvm_name);
114✔
875
                        fd.with_kind(
228✔
876
                              value_kind_t::VALUE_INTEGER, ident, foreign)
877
                            .with_numeric_index(numeric_count);
114✔
878
                        numeric_count += 1;
114✔
879
                    } else if (field_type == "bool") {
258✔
880
                        fd.with_kind(value_kind_t::VALUE_BOOLEAN);
8✔
881
                    } else if (field_type == "addr") {
254✔
882
                        fd.with_kind(
88✔
883
                            value_kind_t::VALUE_TEXT, true, false, "ipaddress");
884
                    } else if (field_type == "port") {
210✔
885
                        fd.with_kind(value_kind_t::VALUE_INTEGER, true);
88✔
886
                    } else if (field_type == "interval") {
166✔
887
                        fd.with_kind(value_kind_t::VALUE_FLOAT)
4✔
888
                            .with_numeric_index(numeric_count);
2✔
889
                        numeric_count += 1;
2✔
890
                    }
891

892
                    ++iter;
622✔
893
                } while (iter != ss.end());
622✔
894
            }
895
        }
154✔
896

897
        if (!this->blf_format_name.empty() && !this->blf_separator.empty()
44✔
898
            && !this->blf_field_defs.empty())
44✔
899
        {
900
            return this->scan_int(dst, li, sbr, sbc);
22✔
901
        }
902

903
        this->blf_format_name.clear();
×
904

UNCOV
905
        return scan_no_match{"no header found"};
×
906
    }
2,563✔
907

908
    void annotate(logfile* lf,
29,800✔
909
                  uint64_t line_number,
910
                  string_attrs_t& sa,
911
                  logline_value_vector& values) const override
912
    {
913
        static const intern_string_t UID = intern_string::lookup("bro_uid");
29,838✔
914

915
        auto& sbr = values.lvv_sbr;
29,800✔
916
        separated_string ss(sbr.get_data(), sbr.length());
29,800✔
917

918
        ss.with_separator(this->blf_separator.get());
29,800✔
919

920
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
664,448✔
921
            if (iter.index() >= this->blf_field_defs.size()) {
634,852✔
922
                return;
204✔
923
            }
924

925
            const field_def& fd = this->blf_field_defs[iter.index()];
634,648✔
926
            string_fragment sf = *iter;
634,648✔
927

928
            if (sf == this->blf_empty_field) {
634,648✔
929
                sf.clear();
29,603✔
930
            } else if (sf == this->blf_unset_field) {
605,045✔
931
                sf.invalidate();
69,070✔
932
            }
933

934
            auto lr = line_range(sf.sf_begin, sf.sf_end);
634,648✔
935

936
            if (fd.fd_meta.lvm_name == TS) {
634,648✔
937
                sa.emplace_back(lr, L_TIMESTAMP.value());
29,800✔
938
            } else if (fd.fd_meta.lvm_name == UID) {
604,848✔
939
                sa.emplace_back(lr, L_OPID.value());
29,800✔
940
                values.lvv_opid_value = sf.to_string();
29,800✔
941
                values.lvv_opid_provenance
942
                    = logline_value_vector::opid_provenance::file;
29,800✔
943
            }
944

945
            if (lr.is_valid()) {
634,648✔
946
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
565,578✔
947
            } else {
948
                values.lvv_values.emplace_back(fd.fd_meta);
69,070✔
949
            }
950
            values.lvv_values.back().lv_meta.lvm_user_hidden
634,648✔
951
                = fd.fd_root_meta->lvm_user_hidden;
634,648✔
952
        }
953

954
        log_format::annotate(lf, line_number, sa, values);
29,596✔
955
    }
956

957
    std::optional<size_t> stats_index_for_value(
35✔
958
        const intern_string_t& name) const override
959
    {
960
        for (const auto& blf_field_def : this->blf_field_defs) {
525✔
961
            if (blf_field_def.fd_meta.lvm_name == name) {
525✔
962
                if (!blf_field_def.fd_numeric_index) {
35✔
963
                    break;
×
964
                }
965
                return blf_field_def.fd_numeric_index.value();
35✔
966
            }
967
        }
968

NEW
969
        return std::nullopt;
×
970
    }
971

972
    bool hide_field(intern_string_t field_name, bool val) override
2✔
973
    {
974
        if (field_name == LOG_TIME_STR) {
2✔
975
            field_name = TS;
×
976
        }
977

978
        auto fd_iter = FIELD_META.find(field_name);
2✔
979
        if (fd_iter == FIELD_META.end()) {
2✔
980
            return false;
×
981
        }
982

983
        fd_iter->second.lvm_user_hidden = val;
2✔
984

985
        return true;
2✔
986
    }
987

988
    std::map<intern_string_t, logline_value_meta> get_field_states() override
52✔
989
    {
990
        std::map<intern_string_t, logline_value_meta> retval;
52✔
991

992
        for (const auto& fd : FIELD_META) {
168✔
993
            retval.emplace(fd.first, fd.second);
116✔
994
        }
995

996
        return retval;
52✔
997
    }
×
998

999
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
22✔
1000
    {
1001
        auto retval = std::make_shared<bro_log_format>(*this);
22✔
1002

1003
        retval->lf_specialized = true;
22✔
1004
        return retval;
44✔
1005
    }
22✔
1006

1007
    class bro_log_table : public log_format_vtab_impl {
1008
    public:
1009
        explicit bro_log_table(const bro_log_format& format)
20✔
1010
            : log_format_vtab_impl(format), blt_format(format)
20✔
1011
        {
1012
        }
20✔
1013

1014
        void get_columns(std::vector<vtab_column>& cols) const override
29✔
1015
        {
1016
            for (const auto& fd : this->blt_format.blf_field_defs) {
854✔
1017
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
825✔
1018
                    fd.fd_meta.lvm_kind);
825✔
1019

1020
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
825✔
1021
                                  type_pair.first,
1022
                                  fd.fd_collator,
825✔
1023
                                  false,
1,650✔
1024
                                  "",
1025
                                  type_pair.second);
1026
            }
1027
        }
29✔
1028

1029
        void get_foreign_keys(
10✔
1030
            std::unordered_set<std::string>& keys_inout) const override
1031
        {
1032
            this->log_vtab_impl::get_foreign_keys(keys_inout);
10✔
1033

1034
            for (const auto& fd : this->blt_format.blf_field_defs) {
292✔
1035
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
282✔
1036
                    keys_inout.emplace(fd.fd_meta.lvm_name.to_string());
123✔
1037
                }
1038
            }
1039
        }
10✔
1040

1041
        const bro_log_format& blt_format;
1042
    };
1043

1044
    static std::map<intern_string_t, std::shared_ptr<bro_log_table>>&
1045
    get_tables()
20✔
1046
    {
1047
        static std::map<intern_string_t, std::shared_ptr<bro_log_table>> retval;
20✔
1048

1049
        return retval;
20✔
1050
    }
1051

1052
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
646✔
1053
    {
1054
        if (this->blf_format_name.empty()) {
646✔
1055
            return nullptr;
626✔
1056
        }
1057

1058
        std::shared_ptr<bro_log_table> retval = nullptr;
20✔
1059

1060
        auto& tables = get_tables();
20✔
1061
        const auto iter = tables.find(this->blf_format_name);
20✔
1062
        if (iter == tables.end()) {
20✔
1063
            retval = std::make_shared<bro_log_table>(*this);
20✔
1064
            tables[this->blf_format_name] = retval;
20✔
1065
        }
1066

1067
        return retval;
20✔
1068
    }
20✔
1069

1070
    void get_subline(const log_format_file_state& lffs,
33,808✔
1071
                     const logline& ll,
1072
                     shared_buffer_ref& sbr,
1073
                     subline_options opts) override
1074
    {
1075
    }
33,808✔
1076

1077
    intern_string_t blf_format_name;
1078
    intern_string_t blf_separator;
1079
    intern_string_t blf_set_separator;
1080
    intern_string_t blf_empty_field;
1081
    intern_string_t blf_unset_field;
1082
    std::vector<field_def> blf_field_defs;
1083
};
1084

1085
std::unordered_map<const intern_string_t, logline_value_meta>
1086
    bro_log_format::FIELD_META;
1087

1088
const intern_string_t bro_log_format::TS = intern_string::lookup("bro_ts");
1089
const intern_string_t bro_log_format::DURATION
1090
    = intern_string::lookup("bro_duration");
1091

1092
struct ws_separated_string {
1093
    const char* ss_str;
1094
    size_t ss_len;
1095

1096
    explicit ws_separated_string(const char* str = nullptr, size_t len = -1)
22,126✔
1097
        : ss_str(str), ss_len(len)
22,126✔
1098
    {
1099
    }
22,126✔
1100

1101
    struct iterator {
1102
        enum class state_t {
1103
            NORMAL,
1104
            QUOTED,
1105
        };
1106

1107
        const ws_separated_string& i_parent;
1108
        const char* i_pos;
1109
        const char* i_next_pos;
1110
        size_t i_index{0};
1111
        state_t i_state{state_t::NORMAL};
1112

1113
        iterator(const ws_separated_string& ss, const char* pos)
70,776✔
1114
            : i_parent(ss), i_pos(pos), i_next_pos(pos)
70,776✔
1115
        {
1116
            this->update();
70,776✔
1117
        }
70,776✔
1118

1119
        void update()
116,645✔
1120
        {
1121
            const auto& ss = this->i_parent;
116,645✔
1122
            bool done = false;
116,645✔
1123

1124
            while (!done && this->i_next_pos < (ss.ss_str + ss.ss_len)) {
852,641✔
1125
                switch (this->i_state) {
735,996✔
1126
                    case state_t::NORMAL:
729,238✔
1127
                        if (*this->i_next_pos == '"') {
729,238✔
1128
                            this->i_state = state_t::QUOTED;
255✔
1129
                        } else if (isspace(*this->i_next_pos)) {
728,983✔
1130
                            done = true;
59,513✔
1131
                        }
1132
                        break;
729,238✔
1133
                    case state_t::QUOTED:
6,758✔
1134
                        if (*this->i_next_pos == '"') {
6,758✔
1135
                            this->i_state = state_t::NORMAL;
255✔
1136
                        }
1137
                        break;
6,758✔
1138
                }
1139
                if (!done) {
735,996✔
1140
                    this->i_next_pos += 1;
676,483✔
1141
                }
1142
            }
1143
        }
116,645✔
1144

1145
        iterator& operator++()
45,869✔
1146
        {
1147
            const auto& ss = this->i_parent;
45,869✔
1148

1149
            this->i_pos = this->i_next_pos;
45,869✔
1150
            while (this->i_pos < (ss.ss_str + ss.ss_len)
45,869✔
1151
                   && isspace(*this->i_pos))
89,119✔
1152
            {
1153
                this->i_pos += 1;
43,250✔
1154
                this->i_next_pos += 1;
43,250✔
1155
            }
1156
            this->update();
45,869✔
1157
            this->i_index += 1;
45,869✔
1158

1159
            return *this;
45,869✔
1160
        }
1161

1162
        string_fragment operator*()
62,838✔
1163
        {
1164
            const auto& ss = this->i_parent;
62,838✔
1165
            int end = this->i_next_pos - ss.ss_str;
62,838✔
1166

1167
            return string_fragment(ss.ss_str, this->i_pos - ss.ss_str, end);
62,838✔
1168
        }
1169

1170
        bool operator==(const iterator& other) const
48,650✔
1171
        {
1172
            return (&this->i_parent == &other.i_parent)
48,650✔
1173
                && (this->i_pos == other.i_pos);
48,650✔
1174
        }
1175

1176
        bool operator!=(const iterator& other) const
46,065✔
1177
        {
1178
            return !(*this == other);
46,065✔
1179
        }
1180

1181
        size_t index() const { return this->i_index; }
86,567✔
1182
    };
1183

1184
    iterator begin() { return {*this, this->ss_str}; }
22,126✔
1185

1186
    iterator end() { return {*this, this->ss_str + this->ss_len}; }
48,650✔
1187
};
1188

1189
class w3c_log_format : public log_format {
1190
public:
1191
    static const intern_string_t F_DATE;
1192
    static const intern_string_t F_TIME;
1193

1194
    struct field_def {
1195
        const intern_string_t fd_name;
1196
        logline_value_meta fd_meta;
1197
        logline_value_meta* fd_root_meta{nullptr};
1198
        std::string fd_collator;
1199
        std::optional<size_t> fd_numeric_index;
1200

1201
        explicit field_def(const intern_string_t name)
22✔
1202
            : fd_name(name), fd_meta(intern_string::lookup(sql_safe_ident(
44✔
1203
                                         name.to_string_fragment())),
44✔
1204
                                     value_kind_t::VALUE_TEXT)
22✔
1205
        {
1206
        }
22✔
1207

1208
        field_def(const intern_string_t name, logline_value_meta meta)
71✔
1209
            : fd_name(name), fd_meta(meta)
71✔
1210
        {
1211
        }
71✔
1212

1213
        field_def(size_t col,
9,744✔
1214
                  const char* name,
1215
                  value_kind_t kind,
1216
                  bool ident = false,
1217
                  bool foreign_key = false,
1218
                  std::string coll = "")
1219
            : fd_name(intern_string::lookup(name)),
19,488✔
1220
              fd_meta(
19,488✔
1221
                  intern_string::lookup(sql_safe_ident(string_fragment(name))),
19,488✔
1222
                  kind,
1223
                  logline_value_meta::table_column{col}),
9,744✔
1224
              fd_collator(std::move(coll))
9,744✔
1225
        {
1226
            this->fd_meta.lvm_identifier = ident;
9,744✔
1227
            this->fd_meta.lvm_foreign_key = foreign_key;
9,744✔
1228
        }
9,744✔
1229

1230
        field_def& with_kind(value_kind_t kind,
1231
                             bool identifier = false,
1232
                             const std::string& collator = "")
1233
        {
1234
            this->fd_meta.lvm_kind = kind;
1235
            this->fd_meta.lvm_identifier = identifier;
1236
            this->fd_collator = collator;
1237
            return *this;
1238
        }
1239

1240
        field_def& with_numeric_index(int index)
51✔
1241
        {
1242
            this->fd_numeric_index = index;
51✔
1243
            return *this;
51✔
1244
        }
1245
    };
1246

1247
    static std::unordered_map<const intern_string_t, logline_value_meta>
1248
        FIELD_META;
1249

1250
    struct field_to_struct_t {
1251
        field_to_struct_t(const char* prefix, const char* struct_name)
2,436✔
1252
            : fs_prefix(prefix),
2,436✔
1253
              fs_struct_name(intern_string::lookup(struct_name))
4,872✔
1254
        {
1255
        }
2,436✔
1256

1257
        const char* fs_prefix;
1258
        intern_string_t fs_struct_name;
1259
    };
1260

1261
    static const std::array<field_def, 16>& get_known_fields()
626✔
1262
    {
1263
        static size_t KNOWN_FIELD_INDEX = 0;
1264
        static const std::array<field_def, 16> RETVAL = {
1265
            field_def{
1266
                KNOWN_FIELD_INDEX++,
1267
                "cs-method",
1268
                value_kind_t::VALUE_TEXT,
1269
                true,
1270
            },
1271
            {
1272
                KNOWN_FIELD_INDEX++,
1273
                "c-ip",
1274
                value_kind_t::VALUE_TEXT,
1275
                true,
1276
                false,
1277
                "ipaddress",
1278
            },
1279
            {
1280
                KNOWN_FIELD_INDEX++,
1281
                "cs-bytes",
1282
                value_kind_t::VALUE_INTEGER,
1283
                false,
1284
            },
1285
            {
1286
                KNOWN_FIELD_INDEX++,
1287
                "cs-host",
1288
                value_kind_t::VALUE_TEXT,
1289
                true,
1290
            },
1291
            {
1292
                KNOWN_FIELD_INDEX++,
1293
                "cs-uri-stem",
1294
                value_kind_t::VALUE_TEXT,
1295
                true,
1296
                false,
1297
                "naturalnocase",
1298
            },
1299
            {
1300
                KNOWN_FIELD_INDEX++,
1301
                "cs-uri-query",
1302
                value_kind_t::VALUE_TEXT,
1303
                false,
1304
            },
1305
            {
1306
                KNOWN_FIELD_INDEX++,
1307
                "cs-username",
1308
                value_kind_t::VALUE_TEXT,
1309
                false,
1310
            },
1311
            {
1312
                KNOWN_FIELD_INDEX++,
1313
                "cs-version",
1314
                value_kind_t::VALUE_TEXT,
1315
                true,
1316
            },
1317
            {
1318
                KNOWN_FIELD_INDEX++,
1319
                "s-ip",
1320
                value_kind_t::VALUE_TEXT,
1321
                true,
1322
                false,
1323
                "ipaddress",
1324
            },
1325
            {
1326
                KNOWN_FIELD_INDEX++,
1327
                "s-port",
1328
                value_kind_t::VALUE_INTEGER,
1329
                true,
1330
            },
1331
            {
1332
                KNOWN_FIELD_INDEX++,
1333
                "s-computername",
1334
                value_kind_t::VALUE_TEXT,
1335
                true,
1336
            },
1337
            {
1338
                KNOWN_FIELD_INDEX++,
1339
                "s-sitename",
1340
                value_kind_t::VALUE_TEXT,
1341
                true,
1342
            },
1343
            {
1344
                KNOWN_FIELD_INDEX++,
1345
                "sc-bytes",
1346
                value_kind_t::VALUE_INTEGER,
1347
                false,
1348
            },
1349
            {
1350
                KNOWN_FIELD_INDEX++,
1351
                "sc-status",
1352
                value_kind_t::VALUE_INTEGER,
1353
                false,
1354
                true,
1355
            },
1356
            {
1357
                KNOWN_FIELD_INDEX++,
1358
                "sc-substatus",
1359
                value_kind_t::VALUE_INTEGER,
1360
                false,
1361
            },
1362
            {
1363
                KNOWN_FIELD_INDEX++,
1364
                "time-taken",
1365
                value_kind_t::VALUE_FLOAT,
1366
                false,
1367
            },
1368
        };
1,844✔
1369

1370
        return RETVAL;
626✔
1371
    }
1372

1373
    static const std::array<field_to_struct_t, 4>& get_known_struct_fields()
623✔
1374
    {
1375
        static const std::array<field_to_struct_t, 4> RETVAL = {
1376
            field_to_struct_t{"cs(", "cs_headers"},
1377
            {"sc(", "sc_headers"},
1378
            {"rs(", "rs_headers"},
1379
            {"sr(", "sr_headers"},
1380
        };
623✔
1381

1382
        return RETVAL;
623✔
1383
    }
1384

1385
    w3c_log_format()
746✔
1386
    {
746✔
1387
        this->lf_is_self_describing = true;
746✔
1388
        this->lf_time_ordered = false;
746✔
1389
        this->lf_structured = true;
746✔
1390
    }
746✔
1391

1392
    const intern_string_t get_name() const override
15,037✔
1393
    {
1394
        static const intern_string_t name(intern_string::lookup("w3c_log"));
16,517✔
1395

1396
        return this->wlf_format_name.empty() ? name : this->wlf_format_name;
15,037✔
1397
    }
1398

1399
    void clear() override
14,616✔
1400
    {
1401
        this->log_format::clear();
14,616✔
1402
        this->wlf_time_scanner.clear();
14,616✔
1403
        this->wlf_format_name.clear();
14,616✔
1404
        this->wlf_field_defs.clear();
14,616✔
1405
    }
14,616✔
1406

1407
    scan_result_t scan_int(std::vector<logline>& dst,
1,315✔
1408
                           const line_info& li,
1409
                           shared_buffer_ref& sbr,
1410
                           scan_batch_context& sbc)
1411
    {
1412
        static const intern_string_t F_DATE_LOCAL
1413
            = intern_string::lookup("date-local");
1,349✔
1414
        static const intern_string_t F_DATE_UTC
1415
            = intern_string::lookup("date-UTC");
1,349✔
1416
        static const intern_string_t F_TIME_LOCAL
1417
            = intern_string::lookup("time-local");
1,349✔
1418
        static const intern_string_t F_TIME_UTC
1419
            = intern_string::lookup("time-UTC");
1,349✔
1420
        static const intern_string_t F_STATUS_CODE
1421
            = intern_string::lookup("sc-status");
1,349✔
1422

1423
        ws_separated_string ss(sbr.get_data(), sbr.length());
1,315✔
1424
        timeval date_tv{0, 0}, time_tv{0, 0};
1,315✔
1425
        exttm date_tm, time_tm;
1,315✔
1426
        bool found_date = false, found_time = false;
1,315✔
1427
        log_level_t level = LEVEL_INFO;
1,315✔
1428

1429
        sbc.sbc_value_stats.resize(this->wlf_field_defs.size());
1,315✔
1430
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
19,693✔
1431
            if (iter.index() >= this->wlf_field_defs.size()) {
18,588✔
1432
                level = LEVEL_INVALID;
1✔
1433
                break;
1✔
1434
            }
1435

1436
            const auto& fd = this->wlf_field_defs[iter.index()];
18,587✔
1437
            string_fragment sf = *iter;
18,587✔
1438

1439
            if (sf.startswith("#")) {
18,587✔
1440
                if (sf == "#Date:") {
209✔
1441
                    auto sbr_sf_opt
1442
                        = sbr.to_string_fragment().consume_n(sf.length());
53✔
1443

1444
                    if (sbr_sf_opt) {
53✔
1445
                        auto sbr_sf = sbr_sf_opt.value().trim();
53✔
1446
                        date_time_scanner dts;
53✔
1447
                        exttm tm;
53✔
1448
                        timeval tv;
1449

1450
                        if (dts.scan(sbr_sf.data(),
53✔
1451
                                     sbr_sf.length(),
53✔
1452
                                     nullptr,
1453
                                     &tm,
1454
                                     tv))
1455
                        {
1456
                            this->lf_date_time.set_base_time(tv.tv_sec,
52✔
1457
                                                             tm.et_tm);
1458
                            this->wlf_time_scanner.set_base_time(tv.tv_sec,
52✔
1459
                                                                 tm.et_tm);
1460
                        }
1461
                    }
1462
                }
1463
                dst.emplace_back(li.li_file_range.fr_offset,
209✔
1464
                                 std::chrono::microseconds{0},
×
1465
                                 LEVEL_IGNORE);
209✔
1466
                return scan_match{2000};
209✔
1467
            }
1468

1469
            sf = sf.trim("\" \t");
18,378✔
1470
            if (F_DATE == fd.fd_name || F_DATE_LOCAL == fd.fd_name
35,694✔
1471
                || F_DATE_UTC == fd.fd_name)
35,694✔
1472
            {
1473
                if (this->lf_date_time.scan(
1,070✔
1474
                        sf.data(), sf.length(), nullptr, &date_tm, date_tv))
1,070✔
1475
                {
1476
                    this->lf_timestamp_flags |= date_tm.et_flags;
1,069✔
1477
                    found_date = true;
1,069✔
1478
                }
1479
            } else if (F_TIME == fd.fd_name || F_TIME_LOCAL == fd.fd_name
33,523✔
1480
                       || F_TIME_UTC == fd.fd_name)
33,523✔
1481
            {
1482
                if (this->wlf_time_scanner.scan(
1,101✔
1483
                        sf.data(), sf.length(), nullptr, &time_tm, time_tv))
1,101✔
1484
                {
1485
                    this->lf_timestamp_flags |= time_tm.et_flags;
1,101✔
1486
                    found_time = true;
1,101✔
1487
                }
1488
            } else if (F_STATUS_CODE == fd.fd_name) {
16,207✔
1489
                if (!sf.empty() && sf[0] >= '4') {
1,098✔
1490
                    level = LEVEL_ERROR;
1,018✔
1491
                }
1492
            }
1493

1494
            if (fd.fd_numeric_index) {
18,378✔
1495
                switch (fd.fd_meta.lvm_kind) {
6,402✔
1496
                    case value_kind_t::VALUE_INTEGER:
6,402✔
1497
                    case value_kind_t::VALUE_FLOAT: {
1498
                        auto scan_float_res
1499
                            = scn::scan_value<double>(sf.to_string_view());
6,402✔
1500

1501
                        if (scan_float_res) {
6,402✔
1502
                            sbc.sbc_value_stats[fd.fd_numeric_index.value()]
6,398✔
1503
                                .add_value(scan_float_res->value());
6,398✔
1504
                        }
1505
                        break;
6,402✔
1506
                    }
1507
                    default:
×
1508
                        break;
×
1509
                }
1510
            }
1511
        }
1512

1513
        if (found_time) {
1,106✔
1514
            auto tm = time_tm;
1,101✔
1515

1516
            if (found_date) {
1,101✔
1517
                tm.et_tm.tm_year = date_tm.et_tm.tm_year;
1,069✔
1518
                tm.et_tm.tm_mday = date_tm.et_tm.tm_mday;
1,069✔
1519
                tm.et_tm.tm_mon = date_tm.et_tm.tm_mon;
1,069✔
1520
                tm.et_tm.tm_wday = date_tm.et_tm.tm_wday;
1,069✔
1521
                tm.et_tm.tm_yday = date_tm.et_tm.tm_yday;
1,069✔
1522
            }
1523

1524
            auto tv = tm.to_timeval();
1,101✔
1525
            if (!this->lf_specialized) {
1,101✔
1526
                for (auto& ll : dst) {
70✔
1527
                    ll.set_ignore(true);
56✔
1528
                }
1529
            }
1530
            dst.emplace_back(li.li_file_range.fr_offset, tv, level);
1,101✔
1531
            return scan_match{2000};
1,101✔
1532
        }
1533

1534
        return scan_no_match{"no header found"};
5✔
1535
    }
1536

1537
    scan_result_t scan(logfile& lf,
11,903✔
1538
                       std::vector<logline>& dst,
1539
                       const line_info& li,
1540
                       shared_buffer_ref& sbr,
1541
                       scan_batch_context& sbc) override
1542
    {
1543
        static const auto* W3C_LOG_NAME = intern_string::lookup("w3c_log");
13,121✔
1544
        static const auto* X_FIELDS_NAME = intern_string::lookup("x_fields");
13,121✔
1545
        static const auto& KNOWN_FIELDS = get_known_fields();
11,903✔
1546
        static const auto& KNOWN_STRUCT_FIELDS = get_known_struct_fields();
11,903✔
1547
        static auto X_FIELDS_IDX = 0;
1548

1549
        if (li.li_partial) {
11,903✔
1550
            return scan_incomplete{};
19✔
1551
        }
1552

1553
        if (dst.empty()) {
11,884✔
1554
            auto file_options = lf.get_file_options();
1,138✔
1555

1556
            if (file_options) {
1,138✔
1557
                this->lf_date_time.dts_default_zone
1558
                    = file_options->second.fo_default_zone.pp_value;
57✔
1559
            } else {
1560
                this->lf_date_time.dts_default_zone = nullptr;
1,081✔
1561
            }
1562
        }
1,138✔
1563

1564
        if (!this->wlf_format_name.empty()) {
11,884✔
1565
            return this->scan_int(dst, li, sbr, sbc);
1,296✔
1566
        }
1567

1568
        if (dst.empty() || dst.size() > 20 || sbr.empty()
20,038✔
1569
            || sbr.get_data()[0] == '#')
20,038✔
1570
        {
1571
            return scan_no_match{"no header found"};
7,855✔
1572
        }
1573

1574
        this->clear();
2,733✔
1575

1576
        for (auto line_iter = dst.begin(); line_iter != dst.end(); ++line_iter)
22,078✔
1577
        {
1578
            auto next_read_result = lf.read_line(line_iter);
19,345✔
1579

1580
            if (next_read_result.isErr()) {
19,345✔
1581
                return scan_no_match{"unable to read first line"};
×
1582
            }
1583

1584
            auto line = next_read_result.unwrap();
19,345✔
1585
            ws_separated_string ss(line.get_data(), line.length());
19,345✔
1586
            auto iter = ss.begin();
19,345✔
1587
            const auto directive = *iter;
19,345✔
1588

1589
            if (directive.empty() || directive[0] != '#') {
19,345✔
1590
                continue;
16,760✔
1591
            }
1592

1593
            ++iter;
2,585✔
1594
            if (iter == ss.end()) {
2,585✔
1595
                continue;
41✔
1596
            }
1597

1598
            if (directive == "#Date:") {
2,544✔
1599
                date_time_scanner dts;
12✔
1600
                struct exttm tm;
12✔
1601
                struct timeval tv;
1602

1603
                if (dts.scan(line.get_data_at(directive.length() + 1),
12✔
1604
                             line.length() - directive.length() - 1,
12✔
1605
                             nullptr,
1606
                             &tm,
1607
                             tv))
1608
                {
1609
                    this->lf_date_time.set_base_time(tv.tv_sec, tm.et_tm);
11✔
1610
                    this->wlf_time_scanner.set_base_time(tv.tv_sec, tm.et_tm);
11✔
1611
                }
1612
            } else if (directive == "#Fields:" && this->wlf_field_defs.empty())
2,532✔
1613
            {
1614
                int numeric_count = 0;
19✔
1615

1616
                do {
1617
                    auto sf = (*iter).trim(")");
210✔
1618

1619
                    auto field_iter = std::find_if(
630✔
1620
                        begin(KNOWN_FIELDS),
1621
                        end(KNOWN_FIELDS),
1622
                        [&sf](auto elem) { return sf == elem.fd_name; });
2,400✔
1623
                    if (field_iter != end(KNOWN_FIELDS)) {
420✔
1624
                        this->wlf_field_defs.emplace_back(*field_iter);
117✔
1625
                        auto& fd = this->wlf_field_defs.back();
117✔
1626
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
117✔
1627
                        if (common_iter == FIELD_META.end()) {
117✔
1628
                            auto emp_res = FIELD_META.emplace(
116✔
1629
                                fd.fd_meta.lvm_name, fd.fd_meta);
116✔
1630
                            common_iter = emp_res.first;
116✔
1631
                        }
1632
                        fd.fd_root_meta = &common_iter->second;
117✔
1633
                    } else if (sf.is_one_of("date", "time")) {
93✔
1634
                        this->wlf_field_defs.emplace_back(
44✔
1635
                            intern_string::lookup(sf));
22✔
1636
                        auto& fd = this->wlf_field_defs.back();
22✔
1637
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
22✔
1638
                        if (common_iter == FIELD_META.end()) {
22✔
1639
                            auto emp_res = FIELD_META.emplace(
21✔
1640
                                fd.fd_meta.lvm_name, fd.fd_meta);
21✔
1641
                            common_iter = emp_res.first;
21✔
1642
                        }
1643
                        fd.fd_root_meta = &common_iter->second;
22✔
1644
                    } else {
1645
                        const auto fs_iter = std::find_if(
213✔
1646
                            begin(KNOWN_STRUCT_FIELDS),
1647
                            end(KNOWN_STRUCT_FIELDS),
1648
                            [&sf](auto elem) {
221✔
1649
                                return sf.startswith(elem.fs_prefix);
221✔
1650
                            });
1651
                        if (fs_iter != end(KNOWN_STRUCT_FIELDS)) {
142✔
1652
                            const intern_string_t field_name
1653
                                = intern_string::lookup(sf.substr(3));
21✔
1654
                            this->wlf_field_defs.emplace_back(
21✔
1655
                                field_name,
1656
                                logline_value_meta(
42✔
1657
                                    field_name,
1658
                                    value_kind_t::VALUE_TEXT,
1659
                                    logline_value_meta::table_column{
×
1660
                                        KNOWN_FIELDS.size() + 1
21✔
1661
                                        + std::distance(
63✔
1662
                                            begin(KNOWN_STRUCT_FIELDS),
1663
                                            fs_iter)},
1664
                                    this)
42✔
1665
                                    .with_struct_name(fs_iter->fs_struct_name));
1666
                        } else {
1667
                            const intern_string_t field_name
1668
                                = intern_string::lookup(sf);
50✔
1669
                            this->wlf_field_defs.emplace_back(
50✔
1670
                                field_name,
1671
                                logline_value_meta(
100✔
1672
                                    field_name,
1673
                                    value_kind_t::VALUE_TEXT,
1674
                                    logline_value_meta::table_column{
×
1675
                                        KNOWN_FIELDS.size() + X_FIELDS_IDX},
100✔
1676
                                    this)
100✔
1677
                                    .with_struct_name(X_FIELDS_NAME));
1678
                        }
1679
                    }
1680
                    auto& fd = this->wlf_field_defs.back();
210✔
1681
                    fd.fd_meta.lvm_format = std::make_optional(this);
210✔
1682
                    switch (fd.fd_meta.lvm_kind) {
210✔
1683
                        case value_kind_t::VALUE_FLOAT:
51✔
1684
                        case value_kind_t::VALUE_INTEGER:
1685
                            fd.with_numeric_index(numeric_count);
51✔
1686
                            numeric_count += 1;
51✔
1687
                            break;
51✔
1688
                        default:
159✔
1689
                            break;
159✔
1690
                    }
1691

1692
                    ++iter;
210✔
1693
                } while (iter != ss.end());
210✔
1694

1695
                this->wlf_format_name = W3C_LOG_NAME;
19✔
1696
            }
1697
        }
36,146✔
1698

1699
        if (!this->wlf_format_name.empty() && !this->wlf_field_defs.empty()) {
2,733✔
1700
            return this->scan_int(dst, li, sbr, sbc);
19✔
1701
        }
1702

1703
        this->wlf_format_name.clear();
2,714✔
1704

1705
        return scan_no_match{"no header found"};
2,714✔
1706
    }
1707

1708
    void annotate(logfile* lf,
1,466✔
1709
                  uint64_t line_number,
1710
                  string_attrs_t& sa,
1711
                  logline_value_vector& values) const override
1712
    {
1713
        auto& sbr = values.lvv_sbr;
1,466✔
1714
        ws_separated_string ss(sbr.get_data(), sbr.length());
1,466✔
1715
        std::optional<line_range> date_lr;
1,466✔
1716
        std::optional<line_range> time_lr;
1,466✔
1717

1718
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
26,162✔
1719
            auto sf = *iter;
24,696✔
1720

1721
            if (iter.index() >= this->wlf_field_defs.size()) {
24,696✔
1722
                sa.emplace_back(line_range{sf.sf_begin, -1},
×
1723
                                SA_INVALID.value("extra fields detected"));
×
1724
                return;
×
1725
            }
1726

1727
            const auto& fd = this->wlf_field_defs[iter.index()];
24,696✔
1728

1729
            if (sf == "-") {
24,696✔
1730
                sf.invalidate();
4,300✔
1731
            }
1732

1733
            auto lr = line_range(sf.sf_begin, sf.sf_end);
24,696✔
1734

1735
            if (lr.is_valid()) {
24,696✔
1736
                if (fd.fd_meta.lvm_name == F_DATE) {
20,396✔
1737
                    date_lr = lr;
1,444✔
1738
                } else if (fd.fd_meta.lvm_name == F_TIME) {
18,952✔
1739
                    time_lr = lr;
1,458✔
1740
                }
1741
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
20,396✔
1742
                if (sf.startswith("\"")) {
20,396✔
1743
                    auto& meta = values.lvv_values.back().lv_meta;
28✔
1744

1745
                    if (meta.lvm_kind == value_kind_t::VALUE_TEXT) {
28✔
1746
                        meta.lvm_kind = value_kind_t::VALUE_W3C_QUOTED;
26✔
1747
                    } else {
1748
                        meta.lvm_kind = value_kind_t::VALUE_NULL;
2✔
1749
                    }
1750
                }
1751
            } else {
1752
                values.lvv_values.emplace_back(fd.fd_meta);
4,300✔
1753
            }
1754
            if (fd.fd_root_meta != nullptr) {
24,696✔
1755
                values.lvv_values.back().lv_meta.lvm_user_hidden
20,318✔
1756
                    = fd.fd_root_meta->lvm_user_hidden;
20,318✔
1757
            }
1758
        }
1759
        if (time_lr) {
1,466✔
1760
            auto ts_lr = time_lr.value();
1,458✔
1761
            if (date_lr) {
1,458✔
1762
                if (date_lr->lr_end + 1 == time_lr->lr_start) {
1,443✔
1763
                    ts_lr.lr_start = date_lr->lr_start;
1,442✔
1764
                    ts_lr.lr_end = time_lr->lr_end;
1,442✔
1765
                }
1766
            }
1767

1768
            sa.emplace_back(ts_lr, L_TIMESTAMP.value());
1,458✔
1769
        }
1770
        log_format::annotate(lf, line_number, sa, values);
1,466✔
1771
    }
1772

NEW
1773
    std::optional<size_t> stats_index_for_value(
×
1774
        const intern_string_t& name) const override
1775
    {
1776
        for (const auto& wlf_field_def : this->wlf_field_defs) {
×
UNCOV
1777
            if (wlf_field_def.fd_meta.lvm_name == name) {
×
1778
                if (!wlf_field_def.fd_numeric_index) {
×
1779
                    break;
×
1780
                }
NEW
1781
                return wlf_field_def.fd_numeric_index.value();
×
1782
            }
1783
        }
1784

NEW
1785
        return std::nullopt;
×
1786
    }
1787

1788
    bool hide_field(const intern_string_t field_name, bool val) override
×
1789
    {
1790
        if (field_name == LOG_TIME_STR) {
×
1791
            auto date_iter = FIELD_META.find(F_DATE);
×
1792
            auto time_iter = FIELD_META.find(F_TIME);
×
1793
            if (date_iter == FIELD_META.end() || time_iter == FIELD_META.end())
×
1794
            {
1795
                return false;
×
1796
            }
1797
            date_iter->second.lvm_user_hidden = val;
×
1798
            time_iter->second.lvm_user_hidden = val;
×
1799
            return true;
×
1800
        }
1801

1802
        auto fd_iter = FIELD_META.find(field_name);
×
1803
        if (fd_iter == FIELD_META.end()) {
×
1804
            return false;
×
1805
        }
1806

1807
        fd_iter->second.lvm_user_hidden = val;
×
1808

1809
        return true;
×
1810
    }
1811

1812
    std::map<intern_string_t, logline_value_meta> get_field_states() override
52✔
1813
    {
1814
        std::map<intern_string_t, logline_value_meta> retval;
52✔
1815

1816
        for (const auto& fd : FIELD_META) {
108✔
1817
            retval.emplace(fd.first, fd.second);
56✔
1818
        }
1819

1820
        return retval;
52✔
1821
    }
×
1822

1823
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
14✔
1824
    {
1825
        auto retval = std::make_shared<w3c_log_format>(*this);
14✔
1826

1827
        retval->lf_specialized = true;
14✔
1828
        return retval;
28✔
1829
    }
14✔
1830

1831
    class w3c_log_table : public log_format_vtab_impl {
1832
    public:
1833
        explicit w3c_log_table(const w3c_log_format& format)
11✔
1834
            : log_format_vtab_impl(format), wlt_format(format)
11✔
1835
        {
1836
        }
11✔
1837

1838
        void get_columns(std::vector<vtab_column>& cols) const override
14✔
1839
        {
1840
            for (const auto& fd : get_known_fields()) {
238✔
1841
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
224✔
1842
                    fd.fd_meta.lvm_kind);
224✔
1843

1844
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
224✔
1845
                                  type_pair.first,
1846
                                  fd.fd_collator,
224✔
1847
                                  false,
448✔
1848
                                  "",
1849
                                  type_pair.second);
1850
            }
1851
            cols.emplace_back("x_fields");
14✔
1852
            cols.back().with_comment(
28✔
1853
                "A JSON-object that contains fields that are not first-class "
1854
                "columns");
1855
            for (const auto& fs : get_known_struct_fields()) {
70✔
1856
                cols.emplace_back(fs.fs_struct_name.to_string());
56✔
1857
            }
1858
        };
14✔
1859

1860
        void get_foreign_keys(
3✔
1861
            std::unordered_set<std::string>& keys_inout) const override
1862
        {
1863
            this->log_vtab_impl::get_foreign_keys(keys_inout);
3✔
1864

1865
            for (const auto& fd : get_known_fields()) {
51✔
1866
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
48✔
1867
                    keys_inout.emplace(fd.fd_meta.lvm_name.to_string());
30✔
1868
                }
1869
            }
1870
        }
3✔
1871

1872
        const w3c_log_format& wlt_format;
1873
    };
1874

1875
    static std::map<intern_string_t, std::shared_ptr<w3c_log_table>>&
1876
    get_tables()
11✔
1877
    {
1878
        static std::map<intern_string_t, std::shared_ptr<w3c_log_table>> retval;
11✔
1879

1880
        return retval;
11✔
1881
    }
1882

1883
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
637✔
1884
    {
1885
        if (this->wlf_format_name.empty()) {
637✔
1886
            return nullptr;
626✔
1887
        }
1888

1889
        std::shared_ptr<w3c_log_table> retval = nullptr;
11✔
1890

1891
        auto& tables = get_tables();
11✔
1892
        const auto iter = tables.find(this->wlf_format_name);
11✔
1893
        if (iter == tables.end()) {
11✔
1894
            retval = std::make_shared<w3c_log_table>(*this);
11✔
1895
            tables[this->wlf_format_name] = retval;
11✔
1896
        }
1897

1898
        return retval;
11✔
1899
    }
11✔
1900

1901
    void get_subline(const log_format_file_state& lffs,
1,629✔
1902
                     const logline& ll,
1903
                     shared_buffer_ref& sbr,
1904
                     subline_options opts) override
1905
    {
1906
    }
1,629✔
1907

1908
    date_time_scanner wlf_time_scanner;
1909
    intern_string_t wlf_format_name;
1910
    std::vector<field_def> wlf_field_defs;
1911
};
1912

1913
std::unordered_map<const intern_string_t, logline_value_meta>
1914
    w3c_log_format::FIELD_META;
1915

1916
const intern_string_t w3c_log_format::F_DATE = intern_string::lookup("date");
1917
const intern_string_t w3c_log_format::F_TIME = intern_string::lookup("time");
1918

1919
struct logfmt_pair_handler {
1920
    explicit logfmt_pair_handler(date_time_scanner& dts) : lph_dt_scanner(dts)
11,883✔
1921
    {
1922
    }
11,883✔
1923

1924
    log_format::scan_result_t process_value(const string_fragment& value_frag)
4,036✔
1925
    {
1926
        if (this->lph_key_frag.is_one_of(
4,036✔
1927
                "timestamp"_frag, "time"_frag, "ts"_frag, "t"_frag))
1928
        {
1929
            if (!this->lph_dt_scanner.scan(value_frag.data(),
31✔
1930
                                           value_frag.length(),
31✔
1931
                                           nullptr,
1932
                                           &this->lph_time_tm,
1933
                                           this->lph_tv))
31✔
1934
            {
1935
                return log_format::scan_no_match{
×
1936
                    "timestamp value did not parse correctly"};
×
1937
            }
1938
            char buf[1024];
1939
            this->lph_dt_scanner.ftime(
31✔
1940
                buf, sizeof(buf), nullptr, this->lph_time_tm);
31✔
1941
            this->lph_found_time = true;
31✔
1942
        } else if (this->lph_key_frag.is_one_of("level"_frag, "lvl"_frag)) {
4,005✔
1943
            this->lph_level
1944
                = string2level(value_frag.data(), value_frag.length());
40✔
1945
        }
1946
        return log_format::scan_match{};
4,036✔
1947
    }
1948

1949
    date_time_scanner& lph_dt_scanner;
1950
    bool lph_found_time{false};
1951
    exttm lph_time_tm;
1952
    timeval lph_tv{0, 0};
1953
    log_level_t lph_level{log_level_t::LEVEL_INFO};
1954
    string_fragment lph_key_frag{""};
1955
};
1956

1957
class logfmt_format : public log_format {
1958
public:
1959
    const intern_string_t get_name() const override
14,566✔
1960
    {
1961
        const static intern_string_t NAME = intern_string::lookup("logfmt_log");
16,046✔
1962

1963
        return NAME;
14,566✔
1964
    }
1965

1966
    class logfmt_log_table : public log_format_vtab_impl {
1967
    public:
1968
        logfmt_log_table(const log_format& format)
626✔
1969
            : log_format_vtab_impl(format)
626✔
1970
        {
1971
        }
626✔
1972

1973
        void get_columns(std::vector<vtab_column>& cols) const override
627✔
1974
        {
1975
            static const auto FIELDS = std::string("fields");
1,879✔
1976

1977
            cols.emplace_back(FIELDS);
627✔
1978
        }
627✔
1979
    };
1980

1981
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
626✔
1982
    {
1983
        static auto retval = std::make_shared<logfmt_log_table>(*this);
626✔
1984

1985
        return retval;
626✔
1986
    }
1987

1988
    scan_result_t scan(logfile& lf,
11,883✔
1989
                       std::vector<logline>& dst,
1990
                       const line_info& li,
1991
                       shared_buffer_ref& sbr,
1992
                       scan_batch_context& sbc) override
1993
    {
1994
        auto p = logfmt::parser(sbr.to_string_fragment());
11,883✔
1995
        scan_result_t retval = scan_no_match{};
11,883✔
1996
        bool done = false;
11,883✔
1997
        logfmt_pair_handler lph(this->lf_date_time);
11,883✔
1998

1999
        if (dst.empty()) {
11,883✔
2000
            auto file_options = lf.get_file_options();
1,150✔
2001

2002
            if (file_options) {
1,150✔
2003
                this->lf_date_time.dts_default_zone
2004
                    = file_options->second.fo_default_zone.pp_value;
57✔
2005
            } else {
2006
                this->lf_date_time.dts_default_zone = nullptr;
1,093✔
2007
            }
2008
        }
1,150✔
2009

2010
        while (!done) {
27,802✔
2011
            auto parse_result = p.step();
15,919✔
2012

2013
            auto value_res = parse_result.match(
2014
                [&done](const logfmt::parser::end_of_input&) -> scan_result_t {
×
2015
                    done = true;
234✔
2016
                    return scan_match{};
234✔
2017
                },
2018
                [&lph](const logfmt::parser::kvpair& kvp) -> scan_result_t {
×
2019
                    lph.lph_key_frag = kvp.first;
4,036✔
2020

2021
                    return kvp.second.match(
2022
                        [](const logfmt::parser::bool_value& bv)
×
2023
                            -> scan_result_t { return scan_match{}; },
×
2024
                        [&lph](const logfmt::parser::float_value& fv)
×
2025
                            -> scan_result_t {
2026
                            return lph.process_value(fv.fv_str_value);
5✔
2027
                        },
2028
                        [&lph](const logfmt::parser::int_value& iv)
×
2029
                            -> scan_result_t {
2030
                            return lph.process_value(iv.iv_str_value);
108✔
2031
                        },
2032
                        [&lph](const logfmt::parser::quoted_value& qv)
×
2033
                            -> scan_result_t {
2034
                            auto_mem<yajl_handle_t> handle(yajl_free);
343✔
2035
                            yajl_callbacks cb;
2036
                            scan_result_t retval;
343✔
2037

2038
                            memset(&cb, 0, sizeof(cb));
343✔
2039
                            handle = yajl_alloc(&cb, nullptr, &lph);
343✔
2040
                            cb.yajl_string = +[](void* ctx,
686✔
2041
                                                 const unsigned char* str,
2042
                                                 size_t len,
2043
                                                 yajl_string_props_t*) -> int {
2044
                                auto& lph = *((logfmt_pair_handler*) ctx);
343✔
2045
                                string_fragment value_frag{str, 0, (int) len};
343✔
2046

2047
                                auto value_res = lph.process_value(value_frag);
343✔
2048
                                return value_res.is<scan_match>();
686✔
2049
                            };
686✔
2050

2051
                            if (yajl_parse(
343✔
2052
                                    handle,
2053
                                    (const unsigned char*) qv.qv_value.data(),
343✔
2054
                                    qv.qv_value.length())
343✔
2055
                                    != yajl_status_ok
2056
                                || yajl_complete_parse(handle)
343✔
2057
                                    != yajl_status_ok)
2058
                            {
2059
                                log_debug("json parsing failed");
×
2060
                                string_fragment unq_frag{
2061
                                    qv.qv_value.sf_string,
×
2062
                                    qv.qv_value.sf_begin + 1,
×
2063
                                    qv.qv_value.sf_end - 1,
×
2064
                                };
2065

2066
                                return lph.process_value(unq_frag);
×
2067
                            }
2068

2069
                            return scan_match{};
343✔
2070
                        },
343✔
2071
                        [&lph](const logfmt::parser::unquoted_value& uv)
4,036✔
2072
                            -> scan_result_t {
2073
                            return lph.process_value(uv.uv_value);
3,580✔
2074
                        });
8,072✔
2075
                },
2076
                [](const logfmt::parser::error& err) -> scan_result_t {
×
2077
                    // log_error("logfmt parse error: %s", err.e_msg.c_str());
2078
                    return scan_no_match{};
11,649✔
2079
                });
15,919✔
2080
            if (value_res.is<scan_no_match>()) {
15,919✔
2081
                retval = value_res;
11,649✔
2082
                done = true;
11,649✔
2083
            }
2084
        }
15,919✔
2085

2086
        if (lph.lph_found_time) {
11,883✔
2087
            this->lf_timestamp_flags = lph.lph_time_tm.et_flags;
31✔
2088
            dst.emplace_back(
31✔
2089
                li.li_file_range.fr_offset, lph.lph_tv, lph.lph_level);
31✔
2090
            retval = scan_match{2000};
31✔
2091
        }
2092

2093
        return retval;
23,766✔
2094
    }
×
2095

2096
    void annotate(logfile* lf,
11✔
2097
                  uint64_t line_number,
2098
                  string_attrs_t& sa,
2099
                  logline_value_vector& values) const override
2100
    {
2101
        static const intern_string_t FIELDS_NAME
2102
            = intern_string::lookup("fields");
15✔
2103

2104
        auto& sbr = values.lvv_sbr;
11✔
2105
        auto p = logfmt::parser(sbr.to_string_fragment());
11✔
2106
        auto done = false;
11✔
2107
        auto found_body = false;
11✔
2108

2109
        while (!done) {
95✔
2110
            auto parse_result = p.step();
84✔
2111

2112
            done = parse_result.match(
168✔
2113
                [](const logfmt::parser::end_of_input&) { return true; },
11✔
2114
                [this, &sa, &values, &found_body](
×
2115
                    const logfmt::parser::kvpair& kvp) {
2116
                    auto value_frag = kvp.second.match(
73✔
2117
                        [this, &kvp, &values](
×
2118
                            const logfmt::parser::bool_value& bv) {
2119
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2120
                                                              kvp.first),
×
2121
                                                          value_kind_t::
2122
                                                              VALUE_INTEGER,
2123
                                                          logline_value_meta::
2124
                                                              table_column{0},
×
2125
                                                          (log_format*) this}
×
2126
                                           .with_struct_name(FIELDS_NAME);
×
2127
                            values.lvv_values.emplace_back(lvm, bv.bv_value);
×
2128

2129
                            return bv.bv_str_value;
×
2130
                        },
×
2131
                        [this, &kvp, &values](
×
2132
                            const logfmt::parser::int_value& iv) {
2133
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2134
                                                              kvp.first),
×
2135
                                                          value_kind_t::
2136
                                                              VALUE_INTEGER,
2137
                                                          logline_value_meta::
2138
                                                              table_column{0},
×
2139
                                                          (log_format*) this}
×
2140
                                           .with_struct_name(FIELDS_NAME);
×
2141
                            values.lvv_values.emplace_back(lvm, iv.iv_value);
×
2142

2143
                            return iv.iv_str_value;
×
2144
                        },
×
2145
                        [this, &kvp, &values](
73✔
2146
                            const logfmt::parser::float_value& fv) {
2147
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2148
                                                              kvp.first),
×
2149
                                                          value_kind_t::
2150
                                                              VALUE_INTEGER,
2151
                                                          logline_value_meta::
2152
                                                              table_column{0},
×
2153
                                                          (log_format*) this}
×
2154
                                           .with_struct_name(FIELDS_NAME);
×
2155
                            values.lvv_values.emplace_back(lvm, fv.fv_value);
×
2156

2157
                            return fv.fv_str_value;
×
2158
                        },
×
2159
                        [](const logfmt::parser::quoted_value& qv) {
×
2160
                            return qv.qv_value;
24✔
2161
                        },
2162
                        [](const logfmt::parser::unquoted_value& uv) {
×
2163
                            return uv.uv_value;
49✔
2164
                        });
2165
                    auto value_lr
2166
                        = line_range{value_frag.sf_begin, value_frag.sf_end};
73✔
2167

2168
                    auto known_field = false;
73✔
2169
                    if (kvp.first.is_one_of(
73✔
2170
                            "timestamp"_frag, "time"_frag, "ts"_frag, "t"_frag))
2171
                    {
2172
                        sa.emplace_back(value_lr, L_TIMESTAMP.value());
11✔
2173
                        known_field = true;
11✔
2174
                    } else if (kvp.first.is_one_of("level"_frag, "lvl"_frag)) {
62✔
2175
                        sa.emplace_back(value_lr, L_LEVEL.value());
11✔
2176
                        known_field = true;
11✔
2177
                    } else if (kvp.first.is_one_of("msg"_frag, "message"_frag))
51✔
2178
                    {
2179
                        sa.emplace_back(value_lr, SA_BODY.value());
11✔
2180
                        found_body = true;
11✔
2181
                    } else if (kvp.second.is<logfmt::parser::quoted_value>()
40✔
2182
                               || kvp.second
78✔
2183
                                      .is<logfmt::parser::unquoted_value>())
38✔
2184
                    {
2185
                        auto lvm
2186
                            = logline_value_meta{intern_string::lookup(
160✔
2187
                                                     kvp.first),
40✔
2188
                                                 value_frag.startswith("\"")
40✔
2189
                                                     ? value_kind_t::VALUE_JSON
2190
                                                     : value_kind_t::VALUE_TEXT,
2191
                                                 logline_value_meta::
2192
                                                     table_column{0},
40✔
2193
                                                 (log_format*) this}
80✔
2194
                                  .with_struct_name(FIELDS_NAME);
40✔
2195
                        values.lvv_values.emplace_back(lvm, value_frag);
40✔
2196
                    }
40✔
2197
                    if (known_field) {
73✔
2198
                        auto key_with_eq = kvp.first;
22✔
2199
                        key_with_eq.sf_end += 1;
22✔
2200
                        sa.emplace_back(to_line_range(key_with_eq),
22✔
2201
                                        SA_REPLACED.value());
44✔
2202
                    } else {
2203
                        sa.emplace_back(to_line_range(kvp.first),
51✔
2204
                                        VC_ROLE.value(role_t::VCR_OBJECT_KEY));
102✔
2205
                    }
2206
                    return false;
73✔
2207
                },
2208
                [line_number, &sbr](const logfmt::parser::error& err) {
84✔
2209
                    log_error(
×
2210
                        "bad line %.*s", (int) sbr.length(), sbr.get_data());
2211
                    log_error("%lld:logfmt parse error: %s",
×
2212
                              line_number,
2213
                              err.e_msg.c_str());
2214
                    return true;
×
2215
                });
2216
        }
84✔
2217

2218
        if (!found_body) {
11✔
2219
            sa.emplace_back(line_range::empty_at(sbr.length()),
×
2220
                            SA_BODY.value());
×
2221
        }
2222

2223
        log_format::annotate(lf, line_number, sa, values);
11✔
2224
    }
11✔
2225

2226
    std::shared_ptr<log_format> specialized(int fmt_lock) override
5✔
2227
    {
2228
        auto retval = std::make_shared<logfmt_format>(*this);
5✔
2229

2230
        retval->lf_specialized = true;
5✔
2231
        return retval;
10✔
2232
    }
5✔
2233
};
2234

2235
static auto format_binder = injector::bind_multiple<log_format>()
2236
                                .add<logfmt_format>()
2237
                                .add<bro_log_format>()
2238
                                .add<w3c_log_format>()
2239
                                .add<generic_log_format>()
2240
                                .add<piper_log_format>();
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc