• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tstack / lnav / 19612931027-2708

23 Nov 2025 02:52PM UTC coverage: 68.84% (-0.04%) from 68.876%
19612931027-2708

push

github

tstack
[tests] fix piper test

51153 of 74307 relevant lines covered (68.84%)

432164.33 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.16
/src/log_format_impls.cc
1
/**
2
 * Copyright (c) 2007-2017, Timothy Stack
3
 *
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * * Redistributions of source code must retain the above copyright notice, this
10
 * list of conditions and the following disclaimer.
11
 * * Redistributions in binary form must reproduce the above copyright notice,
12
 * this list of conditions and the following disclaimer in the documentation
13
 * and/or other materials provided with the distribution.
14
 * * Neither the name of Timothy Stack nor the names of its contributors
15
 * may be used to endorse or promote products derived from this software
16
 * without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 *
29
 * @file log_format_impls.cc
30
 */
31

32
#include <algorithm>
33
#include <chrono>
34
#include <memory>
35
#include <utility>
36

37
#include "log_format.hh"
38

39
#include <stdio.h>
40

41
#include "base/injector.bind.hh"
42
#include "base/opt_util.hh"
43
#include "base/string_attr_type.hh"
44
#include "config.h"
45
#include "formats/logfmt/logfmt.parser.hh"
46
#include "log_vtab_impl.hh"
47
#include "ptimec.hh"
48
#include "scn/scan.h"
49
#include "sql_util.hh"
50
#include "yajlpp/yajlpp.hh"
51

52
class piper_log_format : public log_format {
53
public:
54
    const intern_string_t get_name() const override
15,287✔
55
    {
56
        static const intern_string_t RETVAL
57
            = intern_string::lookup("lnav_piper_log");
16,779✔
58

59
        return RETVAL;
15,287✔
60
    }
61

62
    scan_result_t scan(logfile& lf,
12,530✔
63
                       std::vector<logline>& dst,
64
                       const line_info& li,
65
                       shared_buffer_ref& sbr,
66
                       scan_batch_context& sbc) override
67
    {
68
        if (lf.has_line_metadata()
12,530✔
69
            && lf.get_text_format() == text_format_t::TF_LOG)
12,530✔
70
        {
71
            dst.emplace_back(
293✔
72
                li.li_file_range.fr_offset, li.li_timestamp, li.li_level);
293✔
73
            return scan_match{1};
293✔
74
        }
75

76
        return scan_no_match{"not a piper capture"};
12,237✔
77
    }
78

79
    static constexpr int TIMESTAMP_SIZE = 28;
80

81
    void annotate(logfile* lf,
41✔
82
                  uint64_t line_number,
83
                  string_attrs_t& sa,
84
                  logline_value_vector& values) const override
85
    {
86
        auto lr = line_range{0, TIMESTAMP_SIZE};
41✔
87
        sa.emplace_back(lr, L_TIMESTAMP.value());
41✔
88
        log_format::annotate(lf, line_number, sa, values);
41✔
89
    }
41✔
90

91
    void get_subline(const log_format_file_state& lffs,
317✔
92
                     const logline& ll,
93
                     shared_buffer_ref& sbr,
94
                     subline_options opts) override
95
    {
96
        this->plf_cached_line.resize(TIMESTAMP_SIZE);
317✔
97
        auto tlen = sql_strftime(this->plf_cached_line.data(),
317✔
98
                                 this->plf_cached_line.size(),
99
                                 ll.get_timeval(),
317✔
100
                                 'T');
101
        this->plf_cached_line.resize(tlen);
317✔
102
        {
103
            char zone_str[16];
104
            exttm tmptm;
317✔
105

106
            tmptm.et_flags |= ETF_ZONE_SET;
317✔
107
            tmptm.et_gmtoff
108
                = lnav::local_time_to_info(
634✔
109
                      date::local_seconds{ll.get_time<std::chrono::seconds>()})
317✔
110
                      .first.offset.count();
317✔
111
            off_t zone_len = 0;
317✔
112
            ftime_z(zone_str, zone_len, sizeof(zone_str), tmptm);
317✔
113
            for (off_t lpc = 0; lpc < zone_len; lpc++) {
1,902✔
114
                this->plf_cached_line.push_back(zone_str[lpc]);
1,585✔
115
            }
116
        }
117
        this->plf_cached_line.push_back(' ');
317✔
118
        const auto prefix_len = this->plf_cached_line.size();
317✔
119
        this->plf_cached_line.resize(this->plf_cached_line.size()
634✔
120
                                     + sbr.length());
317✔
121
        memcpy(
317✔
122
            &this->plf_cached_line[prefix_len], sbr.get_data(), sbr.length());
317✔
123

124
        sbr.share(this->plf_share_manager,
634✔
125
                  this->plf_cached_line.data(),
317✔
126
                  this->plf_cached_line.size());
127
    }
317✔
128

129
    std::shared_ptr<log_format> specialized(int fmt_lock) override
6✔
130
    {
131
        auto retval = std::make_shared<piper_log_format>(*this);
6✔
132

133
        retval->lf_specialized = true;
6✔
134
        retval->lf_timestamp_flags |= ETF_ZONE_SET | ETF_MICROS_SET;
6✔
135
        return retval;
12✔
136
    }
6✔
137

138
private:
139
    shared_buffer plf_share_manager;
140
    std::vector<char> plf_cached_line;
141
};
142

143
class generic_log_format : public log_format {
144
public:
145
    static const pcre_format* get_pcre_log_formats()
12,579✔
146
    {
147
        static const pcre_format log_fmt[] = {
148
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>@[0-9a-zA-Z]{16,24}))"),
149
            pcre_format(
150
                R"(^(?:\*\*\*\s+)?(?<timestamp>(?:\s|\d{4}[\-\/]\d{2}[\-\/]\d{2}|T|\d{1,2}:\d{2}(?::\d{2}(?:[\.,]\d{1,6})?)?|Z|[+\-]\d{2}:?\d{2}|(?!DBG|DEBUG|ERR|INFO|WARN|NONE)[A-Z]{3,4})+)[:|\s]?(trc|trace|dbg|debug|info|warn(?:ing)?|err(?:or)?)[:|\s]\s*)"),
151
            pcre_format(
152
                R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,+/-]+) \[(trace|debug|info|warn(?:ing)?|error|critical)\]\s+)"),
153
            pcre_format(
154
                R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,+/-]+) -- (trace|debug|info|warn(?:ing)?|error|critical) --\s+)"),
155

156
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+/\.-]+) \[\w\s+)"),
157
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+,/\.-]+)\s+)"),
158
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+,/\.-]+) -\s+)"),
159
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,/-]+) -\s+)"),
160
            pcre_format(
161
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\] \[(trace|debug|info|warn(?:ing)?|error|critical)\]\s+)"),
162
            pcre_format("^(?:\\*\\*\\*\\s+)?(?<timestamp>[\\w: "
163
                        "\\.,/-]+)\\[[^\\]]+\\]\\s+"),
164
            pcre_format(R"(^(?:\*\*\*\s+)?(?<timestamp>[\w:+ \.,/-]+)\s+)"),
165

166
            pcre_format(
167
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\]\s*(\w+):?\s+)"),
168
            pcre_format(
169
                R"(^(?:\*\*\*\s+)?\[(?<timestamp>[\w:+ \.,+/-]+)\]\s+)"),
170
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: "
171
                        "\\.,+/-]+)\\] \\w+\\s+"),
172
            pcre_format("^(?:\\*\\*\\*\\s+)?\\[(?<timestamp>[\\w: ,+/-]+)\\] "
173
                        "\\(\\d+\\)\\s+"),
174

175
            pcre_format(),
176
        };
12,579✔
177

178
        return log_fmt;
12,579✔
179
    }
180

181
    std::string get_pattern_regex(const pattern_locks& pl,
×
182
                                  uint64_t line_number) const override
183
    {
184
        auto pat_index = pl.pattern_index_for_line(line_number);
×
185
        return get_pcre_log_formats()[pat_index].name;
×
186
    }
187

188
    const intern_string_t get_name() const override
14,906✔
189
    {
190
        static const intern_string_t RETVAL
191
            = intern_string::lookup("generic_log");
16,398✔
192

193
        return RETVAL;
14,906✔
194
    }
195

196
    scan_result_t scan(logfile& lf,
12,493✔
197
                       std::vector<logline>& dst,
198
                       const line_info& li,
199
                       shared_buffer_ref& sbr,
200
                       scan_batch_context& sbc) override
201
    {
202
        exttm log_time;
12,493✔
203
        timeval log_tv;
204
        string_fragment ts;
12,493✔
205
        std::optional<string_fragment> level;
12,493✔
206
        const char* last_pos;
207

208
        if (dst.empty()) {
12,493✔
209
            auto file_options = lf.get_file_options();
201✔
210

211
            if (file_options) {
201✔
212
                this->lf_date_time.dts_default_zone
213
                    = file_options->second.fo_default_zone.pp_value;
2✔
214
            } else {
215
                this->lf_date_time.dts_default_zone = nullptr;
199✔
216
            }
217
        }
201✔
218

219
        if ((last_pos = this->log_scanf(sbc,
24,986✔
220
                                        dst.size(),
12,493✔
221
                                        sbr.to_string_fragment(),
222
                                        get_pcre_log_formats(),
223
                                        nullptr,
224
                                        &log_time,
225
                                        &log_tv,
226

227
                                        &ts,
228
                                        &level))
229
            != nullptr)
12,493✔
230
        {
231
            auto level_val = log_level_t::LEVEL_UNKNOWN;
2,359✔
232
            if (level) {
2,359✔
233
                level_val = string2level(level->data(), level->length());
2,359✔
234
            }
235

236
            if (!((log_time.et_flags & ETF_DAY_SET)
2,359✔
237
                  && (log_time.et_flags & ETF_MONTH_SET)
2,284✔
238
                  && (log_time.et_flags & ETF_YEAR_SET)))
2,284✔
239
            {
240
                this->check_for_new_year(dst, log_time, log_tv);
680✔
241
            }
242

243
            if (!(this->lf_timestamp_flags
4,718✔
244
                  & (ETF_MILLIS_SET | ETF_MICROS_SET | ETF_NANOS_SET))
2,359✔
245
                && !dst.empty()
2,034✔
246
                && dst.back().get_time<std::chrono::seconds>().count()
2,032✔
247
                    == log_tv.tv_sec
2,032✔
248
                && dst.back()
5,249✔
249
                        .get_subsecond_time<std::chrono::microseconds>()
3,215✔
250
                        .count()
856✔
251
                    != 0)
252
            {
253
                auto log_ms
254
                    = dst.back()
×
255
                          .get_subsecond_time<std::chrono::microseconds>();
×
256

257
                log_time.et_nsec
258
                    = std::chrono::duration_cast<std::chrono::nanoseconds>(
×
259
                          log_ms)
260
                          .count();
×
261
                log_tv.tv_usec
262
                    = std::chrono::duration_cast<std::chrono::microseconds>(
×
263
                          log_ms)
264
                          .count();
×
265
            }
266

267
            auto log_us = to_us(log_tv);
2,359✔
268
            auto tid_iter = sbc.sbc_tids.insert_tid(
2,359✔
269
                sbc.sbc_allocator, string_fragment{}, log_us);
×
270
            tid_iter->second.titr_level_stats.update_msg_count(level_val);
2,359✔
271
            dst.emplace_back(li.li_file_range.fr_offset, log_us, level_val);
2,359✔
272
            return scan_match{5};
2,359✔
273
        }
274

275
        return scan_no_match{"no patterns matched"};
10,134✔
276
    }
277

278
    void annotate(logfile* lf,
86✔
279
                  uint64_t line_number,
280
                  string_attrs_t& sa,
281
                  logline_value_vector& values) const override
282
    {
283
        thread_local auto md = lnav::pcre2pp::match_data::unitialized();
86✔
284
        auto lffs = lf->get_format_file_state();
86✔
285
        auto& line = values.lvv_sbr;
86✔
286
        int pat_index
287
            = lffs.lffs_pattern_locks.pattern_index_for_line(line_number);
86✔
288
        const auto& fmt = get_pcre_log_formats()[pat_index];
86✔
289
        int prefix_len = 0;
86✔
290
        const auto line_sf = line.to_string_fragment();
86✔
291
        auto match_res = fmt.pcre->capture_from(line_sf)
86✔
292
                             .into(md)
86✔
293
                             .matches(PCRE2_NO_UTF_CHECK)
172✔
294
                             .ignore_error();
86✔
295
        if (!match_res) {
86✔
296
            return;
7✔
297
        }
298

299
        auto ts_cap = md[fmt.pf_timestamp_index].value();
79✔
300
        auto lr = to_line_range(ts_cap.trim());
79✔
301
        sa.emplace_back(lr, L_TIMESTAMP.value());
79✔
302

303
        values.lvv_values.emplace_back(TS_META, line, lr);
79✔
304
        values.lvv_values.back().lv_meta.lvm_format = (log_format*) this;
79✔
305

306
        prefix_len = md[0]->sf_end;
79✔
307
        auto level_cap = md[2];
79✔
308
        if (level_cap) {
79✔
309
            if (string2level(level_cap->data(), level_cap->length(), true)
73✔
310
                != LEVEL_UNKNOWN)
73✔
311
            {
312
                values.lvv_values.emplace_back(
73✔
313
                    LEVEL_META, line, to_line_range(level_cap->trim()));
73✔
314
                values.lvv_values.back().lv_meta.lvm_format
73✔
315
                    = (log_format*) this;
73✔
316

317
                lr = to_line_range(level_cap->trim());
73✔
318
                if (lr.lr_end != (ssize_t) line.length()) {
73✔
319
                    sa.emplace_back(lr, L_LEVEL.value());
73✔
320
                }
321
            }
322
        }
323

324
        lr.lr_start = 0;
79✔
325
        lr.lr_end = prefix_len;
79✔
326
        sa.emplace_back(lr, L_PREFIX.value());
79✔
327

328
        lr.lr_start = prefix_len;
79✔
329
        lr.lr_end = line.length();
79✔
330
        sa.emplace_back(lr, SA_BODY.value());
79✔
331

332
        log_format::annotate(lf, line_number, sa, values);
79✔
333
    }
334

335
    std::shared_ptr<log_format> specialized(int fmt_lock) override
50✔
336
    {
337
        auto retval = std::make_shared<generic_log_format>(*this);
50✔
338

339
        retval->lf_specialized = true;
50✔
340
        return retval;
100✔
341
    }
50✔
342

343
    bool hide_field(const intern_string_t field_name, bool val) override
2✔
344
    {
345
        if (field_name == TS_META.lvm_name) {
2✔
346
            TS_META.lvm_user_hidden = val;
1✔
347
            return true;
1✔
348
        }
349
        if (field_name == LEVEL_META.lvm_name) {
1✔
350
            LEVEL_META.lvm_user_hidden = val;
1✔
351
            return true;
1✔
352
        }
353
        if (field_name == OPID_META.lvm_name) {
×
354
            OPID_META.lvm_user_hidden = val;
×
355
            return true;
×
356
        }
357
        return false;
×
358
    }
359

360
    std::map<intern_string_t, logline_value_meta> get_field_states() override
52✔
361
    {
362
        return {
363
            {TS_META.lvm_name, TS_META},
364
            {LEVEL_META.lvm_name, LEVEL_META},
365
            {OPID_META.lvm_name, OPID_META},
366
        };
260✔
367
    }
52✔
368

369
private:
370
    static logline_value_meta TS_META;
371
    static logline_value_meta LEVEL_META;
372
    static logline_value_meta OPID_META;
373
};
374

375
logline_value_meta generic_log_format::TS_META{
376
    intern_string::lookup("log_time"),
377
    value_kind_t::VALUE_TEXT,
378
    logline_value_meta::table_column{2},
379
};
380

381
logline_value_meta generic_log_format::LEVEL_META{
382
    intern_string::lookup("log_level"),
383
    value_kind_t::VALUE_TEXT,
384
    logline_value_meta::table_column{3},
385
};
386

387
logline_value_meta generic_log_format::OPID_META{
388
    intern_string::lookup("log_opid"),
389
    value_kind_t::VALUE_TEXT,
390
    logline_value_meta::internal_column{},
391
};
392

393
std::string
394
from_escaped_string(const char* str, size_t len)
24✔
395
{
396
    std::string retval;
24✔
397

398
    for (size_t lpc = 0; lpc < len; lpc++) {
48✔
399
        switch (str[lpc]) {
24✔
400
            case '\\':
24✔
401
                if ((lpc + 3) < len && str[lpc + 1] == 'x') {
24✔
402
                    int ch;
403

404
                    if (sscanf(&str[lpc + 2], "%2x", &ch) == 1) {
24✔
405
                        retval.append(1, (char) ch & 0xff);
24✔
406
                        lpc += 3;
24✔
407
                    }
408
                }
409
                break;
24✔
410
            default:
×
411
                retval.append(1, str[lpc]);
×
412
                break;
×
413
        }
414
    }
415

416
    return retval;
24✔
417
}
×
418

419
std::optional<const char*>
420
lnav_strnstr(const char* s, const char* find, size_t slen)
1,630,758✔
421
{
422
    char c, sc;
423
    size_t len;
424

425
    if ((c = *find++) != '\0') {
1,630,758✔
426
        len = strlen(find);
1,630,758✔
427
        do {
428
            do {
429
                if (slen < 1 || (sc = *s) == '\0') {
7,107,240✔
430
                    return std::nullopt;
884,959✔
431
                }
432
                --slen;
6,222,281✔
433
                ++s;
6,222,281✔
434
            } while (sc != c);
6,222,281✔
435
            if (len > slen) {
745,799✔
436
                return std::nullopt;
×
437
            }
438
        } while (strncmp(s, find, len) != 0);
745,799✔
439
        s--;
745,799✔
440
    }
441
    return s;
745,799✔
442
}
443

444
struct separated_string {
445
    const char* ss_str;
446
    size_t ss_len;
447
    const char* ss_separator;
448
    size_t ss_separator_len;
449

450
    separated_string(const char* str, size_t len)
35,021✔
451
        : ss_str(str), ss_len(len), ss_separator(","),
35,021✔
452
          ss_separator_len(strlen(this->ss_separator))
35,021✔
453
    {
454
    }
35,021✔
455

456
    separated_string& with_separator(const char* sep)
35,021✔
457
    {
458
        this->ss_separator = sep;
35,021✔
459
        this->ss_separator_len = strlen(sep);
35,021✔
460
        return *this;
35,021✔
461
    }
462

463
    struct iterator {
464
        const separated_string& i_parent;
465
        const char* i_pos;
466
        const char* i_next_pos;
467
        size_t i_index;
468

469
        iterator(const separated_string& ss, const char* pos)
850,316✔
470
            : i_parent(ss), i_pos(pos), i_next_pos(pos), i_index(0)
850,316✔
471
        {
472
            this->update();
850,316✔
473
        }
850,316✔
474

475
        void update()
1,630,758✔
476
        {
477
            const separated_string& ss = this->i_parent;
1,630,758✔
478
            auto next_field
479
                = lnav_strnstr(this->i_pos,
1,630,758✔
480
                               ss.ss_separator,
1,630,758✔
481
                               ss.ss_len - (this->i_pos - ss.ss_str));
1,630,758✔
482
            if (next_field) {
1,630,758✔
483
                this->i_next_pos = next_field.value() + ss.ss_separator_len;
745,799✔
484
            } else {
485
                this->i_next_pos = ss.ss_str + ss.ss_len;
884,959✔
486
            }
487
        }
1,630,758✔
488

489
        iterator& operator++()
780,442✔
490
        {
491
            this->i_pos = this->i_next_pos;
780,442✔
492
            this->update();
780,442✔
493
            this->i_index += 1;
780,442✔
494

495
            return *this;
780,442✔
496
        }
497

498
        string_fragment operator*()
690,840✔
499
        {
500
            const auto& ss = this->i_parent;
690,840✔
501
            int end;
502

503
            if (this->i_next_pos < (ss.ss_str + ss.ss_len)) {
690,840✔
504
                end = this->i_next_pos - ss.ss_str - ss.ss_separator_len;
660,905✔
505
            } else {
506
                end = this->i_next_pos - ss.ss_str;
29,935✔
507
            }
508
            return string_fragment::from_byte_range(
690,840✔
509
                ss.ss_str, this->i_pos - ss.ss_str, end);
690,840✔
510
        }
511

512
        bool operator==(const iterator& other) const
815,295✔
513
        {
514
            return (&this->i_parent == &other.i_parent)
815,295✔
515
                && (this->i_pos == other.i_pos);
815,295✔
516
        }
517

518
        bool operator!=(const iterator& other) const
815,127✔
519
        {
520
            return !(*this == other);
815,127✔
521
        }
522

523
        size_t index() const { return this->i_index; }
1,697,321✔
524
    };
525

526
    iterator begin() { return {*this, this->ss_str}; }
35,021✔
527

528
    iterator end() { return {*this, this->ss_str + this->ss_len}; }
815,295✔
529
};
530

531
class bro_log_format : public log_format {
532
public:
533
    static const intern_string_t TS;
534
    static const intern_string_t DURATION;
535
    struct field_def {
536
        logline_value_meta fd_meta;
537
        logline_value_meta* fd_root_meta;
538
        std::string fd_collator;
539
        std::optional<size_t> fd_numeric_index;
540

541
        explicit field_def(const intern_string_t name,
680✔
542
                           size_t col,
543
                           log_format* format)
544
            : fd_meta(name,
1,360✔
545
                      value_kind_t::VALUE_TEXT,
546
                      logline_value_meta::table_column{col},
680✔
547
                      format),
548
              fd_root_meta(&FIELD_META.find(name)->second)
680✔
549
        {
550
        }
680✔
551

552
        field_def& with_kind(value_kind_t kind,
500✔
553
                             bool identifier = false,
554
                             bool foreign_key = false,
555
                             const std::string& collator = "")
556
        {
557
            this->fd_meta.lvm_kind = kind;
500✔
558
            this->fd_meta.lvm_identifier = identifier;
500✔
559
            this->fd_meta.lvm_foreign_key = foreign_key;
500✔
560
            this->fd_collator = collator;
500✔
561
            return *this;
500✔
562
        }
563

564
        field_def& with_numeric_index(size_t index)
126✔
565
        {
566
            this->fd_numeric_index = index;
126✔
567
            return *this;
126✔
568
        }
569
    };
570

571
    static std::unordered_map<const intern_string_t, logline_value_meta>
572
        FIELD_META;
573

574
    static const intern_string_t get_opid_desc()
752✔
575
    {
576
        static const intern_string_t RETVAL = intern_string::lookup("std");
2,256✔
577

578
        return RETVAL;
752✔
579
    }
580

581
    bro_log_format()
752✔
582
    {
752✔
583
        this->lf_structured = true;
752✔
584
        this->lf_is_self_describing = true;
752✔
585
        this->lf_time_ordered = false;
752✔
586
        this->lf_timestamp_point_of_reference
587
            = timestamp_point_of_reference_t::start;
752✔
588

589
        auto desc_v = std::make_shared<std::vector<opid_descriptor>>();
752✔
590
        desc_v->emplace({});
752✔
591
        auto emplace_res = this->lf_opid_description_def->emplace(
1,504✔
592
            get_opid_desc(), opid_descriptors{desc_v, 0});
1,504✔
593
        this->lf_opid_description_def_vec->emplace_back(
752✔
594
            &emplace_res.first->second);
752✔
595
    }
752✔
596

597
    const intern_string_t get_name() const override
116,407✔
598
    {
599
        static const intern_string_t name(intern_string::lookup("bro"));
117,899✔
600

601
        return this->blf_format_name.empty() ? name : this->blf_format_name;
116,407✔
602
    }
603

604
    void clear() override
12,554✔
605
    {
606
        this->log_format::clear();
12,554✔
607
        this->blf_format_name.clear();
12,554✔
608
        this->blf_field_defs.clear();
12,554✔
609
    }
12,554✔
610

611
    std::vector<logline_value_meta> get_value_metadata() const override
×
612
    {
613
        std::vector<logline_value_meta> retval;
×
614

615
        for (const auto& fd : this->blf_field_defs) {
×
616
            retval.emplace_back(fd.fd_meta);
×
617
        }
618
        return retval;
×
619
    }
×
620

621
    scan_result_t scan_int(std::vector<logline>& dst,
4,857✔
622
                           const line_info& li,
623
                           shared_buffer_ref& sbr,
624
                           scan_batch_context& sbc)
625
    {
626
        static const intern_string_t STATUS_CODE
627
            = intern_string::lookup("bro_status_code");
4,903✔
628
        static const intern_string_t UID = intern_string::lookup("bro_uid");
4,903✔
629
        static const intern_string_t ID_ORIG_H
630
            = intern_string::lookup("bro_id_orig_h");
4,903✔
631

632
        separated_string ss(sbr.get_data(), sbr.length());
4,857✔
633
        timeval tv;
634
        exttm tm;
4,857✔
635
        auto found_ts = false;
4,857✔
636
        log_level_t level = LEVEL_INFO;
4,857✔
637
        uint16_t opid = 0;
4,857✔
638
        auto opid_cap = string_fragment::invalid();
4,857✔
639
        auto host_cap = string_fragment::invalid();
4,857✔
640
        auto duration = std::chrono::microseconds{0};
4,857✔
641

642
        sbc.sbc_value_stats.resize(this->blf_field_defs.size());
4,857✔
643
        ss.with_separator(this->blf_separator.get());
4,857✔
644

645
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
143,439✔
646
            if (iter.index() == 0 && *iter == "#close") {
138,608✔
647
                return scan_match{2000};
26✔
648
            }
649

650
            if (iter.index() >= this->blf_field_defs.size()) {
138,582✔
651
                break;
×
652
            }
653

654
            const auto& fd = this->blf_field_defs[iter.index()];
138,582✔
655

656
            if (TS == fd.fd_meta.lvm_name) {
138,582✔
657
                static const char* const TIME_FMT[] = {"%s.%f"};
658
                const auto sf = *iter;
4,830✔
659

660
                if (this->lf_date_time.scan(
4,830✔
661
                        sf.data(), sf.length(), TIME_FMT, &tm, tv))
4,830✔
662
                {
663
                    this->lf_timestamp_flags = tm.et_flags;
4,830✔
664
                    found_ts = true;
4,830✔
665
                }
666
            } else if (STATUS_CODE == fd.fd_meta.lvm_name) {
133,752✔
667
                const auto sf = *iter;
4,644✔
668

669
                if (!sf.empty() && sf[0] >= '4') {
4,644✔
670
                    level = LEVEL_ERROR;
23✔
671
                }
672
            } else if (UID == fd.fd_meta.lvm_name) {
129,108✔
673
                opid_cap = *iter;
4,830✔
674

675
                opid = opid_cap.hash();
4,830✔
676
            } else if (ID_ORIG_H == fd.fd_meta.lvm_name) {
124,278✔
677
                host_cap = *iter;
4,830✔
678
            } else if (DURATION == fd.fd_meta.lvm_name) {
119,448✔
679
                const auto sf = *iter;
186✔
680
                auto scan_res = scn::scan<double>("{}", sf.to_string_view());
186✔
681
                if (scan_res) {
186✔
682
                    duration = std::chrono::microseconds{
×
683
                        static_cast<long long>(scan_res->value() * 1000000)};
684
                }
685
            }
686

687
            if (fd.fd_numeric_index) {
138,582✔
688
                switch (fd.fd_meta.lvm_kind) {
24,708✔
689
                    case value_kind_t::VALUE_INTEGER:
24,708✔
690
                    case value_kind_t::VALUE_FLOAT: {
691
                        const auto sv = (*iter).to_string_view();
24,708✔
692
                        auto scan_float_res = scn::scan_value<double>(sv);
24,708✔
693
                        if (scan_float_res) {
24,708✔
694
                            sbc.sbc_value_stats[fd.fd_numeric_index.value()]
20,064✔
695
                                .add_value(scan_float_res->value());
20,064✔
696
                        }
697
                        break;
24,708✔
698
                    }
699
                    default:
×
700
                        break;
×
701
                }
702
            }
703
        }
704

705
        if (found_ts) {
4,831✔
706
            if (!this->lf_specialized) {
4,830✔
707
                for (auto& ll : dst) {
216✔
708
                    ll.set_ignore(true);
192✔
709
                }
710
            }
711

712
            auto log_us = to_us(tv);
4,830✔
713
            if (opid_cap.is_valid()) {
4,830✔
714
                auto opid_iter = sbc.sbc_opids.insert_op(
4,830✔
715
                    sbc.sbc_allocator,
716
                    opid_cap,
717
                    log_us,
718
                    this->lf_timestamp_point_of_reference,
719
                    duration);
720
                opid_iter->second.otr_level_stats.update_msg_count(level);
4,830✔
721

722
                auto& otr = opid_iter->second;
4,830✔
723
                if (!otr.otr_description.lod_index && host_cap.is_valid()
7,039✔
724
                    && otr.otr_description.lod_elements.empty())
7,039✔
725
                {
726
                    otr.otr_description.lod_index = 0;
2,209✔
727
                    otr.otr_description.lod_elements.insert(
4,418✔
728
                        0, host_cap.to_string());
2,209✔
729
                }
730
            }
731
            dst.emplace_back(li.li_file_range.fr_offset, log_us, level, opid);
4,830✔
732
            dst.back().set_opid(opid);
4,830✔
733
            return scan_match{2000};
4,830✔
734
        }
735
        return scan_no_match{"no header found"};
1✔
736
    }
737

738
    scan_result_t scan(logfile& lf,
12,624✔
739
                       std::vector<logline>& dst,
740
                       const line_info& li,
741
                       shared_buffer_ref& sbr,
742
                       scan_batch_context& sbc) override
743
    {
744
        static const auto SEP_RE
745
            = lnav::pcre2pp::code::from_const(R"(^#separator\s+(.+))");
12,624✔
746

747
        if (dst.empty()) {
12,624✔
748
            auto file_options = lf.get_file_options();
1,236✔
749

750
            if (file_options) {
1,236✔
751
                this->lf_date_time.dts_default_zone
752
                    = file_options->second.fo_default_zone.pp_value;
57✔
753
            } else {
754
                this->lf_date_time.dts_default_zone = nullptr;
1,179✔
755
            }
756
        }
1,236✔
757

758
        if (!this->blf_format_name.empty()) {
12,624✔
759
            return this->scan_int(dst, li, sbr, sbc);
4,833✔
760
        }
761

762
        if (dst.empty() || dst.size() > 20 || sbr.empty()
14,441✔
763
            || sbr.get_data()[0] == '#')
14,441✔
764
        {
765
            return scan_no_match{"no header found"};
5,193✔
766
        }
767

768
        auto line_iter = dst.begin();
2,598✔
769
        auto read_result = lf.read_line(line_iter);
2,598✔
770

771
        if (read_result.isErr()) {
2,598✔
772
            return scan_no_match{"unable to read first line"};
×
773
        }
774

775
        auto line = read_result.unwrap();
2,598✔
776
        auto md = SEP_RE.create_match_data();
2,598✔
777

778
        auto match_res = SEP_RE.capture_from(line.to_string_fragment())
2,598✔
779
                             .into(md)
2,598✔
780
                             .matches(PCRE2_NO_UTF_CHECK)
5,196✔
781
                             .ignore_error();
2,598✔
782
        if (!match_res) {
2,598✔
783
            return scan_no_match{"cannot read separator header"};
2,574✔
784
        }
785

786
        this->clear();
24✔
787

788
        auto sep = from_escaped_string(md[1]->data(), md[1]->length());
24✔
789
        this->blf_separator = intern_string::lookup(sep);
24✔
790

791
        for (++line_iter; line_iter != dst.end(); ++line_iter) {
192✔
792
            auto next_read_result = lf.read_line(line_iter);
168✔
793

794
            if (next_read_result.isErr()) {
168✔
795
                return scan_no_match{"unable to read header line"};
×
796
            }
797

798
            line = next_read_result.unwrap();
168✔
799
            separated_string ss(line.get_data(), line.length());
168✔
800

801
            ss.with_separator(this->blf_separator.get());
168✔
802
            auto iter = ss.begin();
168✔
803

804
            string_fragment directive = *iter;
168✔
805

806
            if (directive.empty() || directive[0] != '#') {
168✔
807
                continue;
×
808
            }
809

810
            ++iter;
168✔
811
            if (iter == ss.end()) {
168✔
812
                continue;
×
813
            }
814

815
            if (directive == "#set_separator") {
168✔
816
                this->blf_set_separator = intern_string::lookup(*iter);
24✔
817
            } else if (directive == "#empty_field") {
144✔
818
                this->blf_empty_field = intern_string::lookup(*iter);
24✔
819
            } else if (directive == "#unset_field") {
120✔
820
                this->blf_unset_field = intern_string::lookup(*iter);
24✔
821
            } else if (directive == "#path") {
96✔
822
                auto full_name = fmt::format(FMT_STRING("bro_{}_log"), *iter);
72✔
823
                this->blf_format_name = intern_string::lookup(full_name);
24✔
824
            } else if (directive == "#fields" && this->blf_field_defs.empty()) {
96✔
825
                do {
826
                    auto field_name
827
                        = intern_string::lookup("bro_" + sql_safe_ident(*iter));
680✔
828
                    auto common_iter = FIELD_META.find(field_name);
680✔
829
                    if (common_iter == FIELD_META.end()) {
680✔
830
                        FIELD_META.emplace(field_name,
674✔
831
                                           logline_value_meta{
1,348✔
832
                                               field_name,
833
                                               value_kind_t::VALUE_TEXT,
834
                                           });
835
                    }
836
                    this->blf_field_defs.emplace_back(
1,360✔
837
                        field_name, this->blf_field_defs.size(), this);
680✔
838
                    ++iter;
680✔
839
                } while (iter != ss.end());
680✔
840
            } else if (directive == "#types") {
48✔
841
                static const char* KNOWN_IDS[] = {
842
                    "bro_conn_uids",
843
                    "bro_fuid",
844
                    "bro_host",
845
                    "bro_info_code",
846
                    "bro_method",
847
                    "bro_mime_type",
848
                    "bro_orig_fuids",
849
                    "bro_parent_fuid",
850
                    "bro_proto",
851
                    "bro_referrer",
852
                    "bro_resp_fuids",
853
                    "bro_service",
854
                    "bro_uid",
855
                    "bro_uri",
856
                    "bro_user_agent",
857
                    "bro_username",
858
                };
859
                static const char* KNOWN_FOREIGN[] = {
860
                    "bro_status_code",
861
                };
862

863
                int numeric_count = 0;
24✔
864

865
                do {
866
                    string_fragment field_type = *iter;
680✔
867
                    auto& fd = this->blf_field_defs[iter.index() - 1];
680✔
868

869
                    if (field_type == "time") {
680✔
870
                        fd.with_kind(value_kind_t::VALUE_TIMESTAMP);
48✔
871
                    } else if (field_type == "string") {
656✔
872
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
500✔
873
                                                        std::end(KNOWN_IDS),
874
                                                        fd.fd_meta.lvm_name);
250✔
875
                        fd.with_kind(value_kind_t::VALUE_TEXT, ident);
500✔
876
                    } else if (field_type == "count") {
406✔
877
                        bool ident = std::binary_search(std::begin(KNOWN_IDS),
248✔
878
                                                        std::end(KNOWN_IDS),
879
                                                        fd.fd_meta.lvm_name);
124✔
880
                        bool foreign
881
                            = std::binary_search(std::begin(KNOWN_FOREIGN),
248✔
882
                                                 std::end(KNOWN_FOREIGN),
883
                                                 fd.fd_meta.lvm_name);
124✔
884
                        fd.with_kind(
248✔
885
                              value_kind_t::VALUE_INTEGER, ident, foreign)
886
                            .with_numeric_index(numeric_count);
124✔
887
                        numeric_count += 1;
124✔
888
                    } else if (field_type == "bool") {
282✔
889
                        fd.with_kind(value_kind_t::VALUE_BOOLEAN);
8✔
890
                    } else if (field_type == "addr") {
278✔
891
                        fd.with_kind(
96✔
892
                            value_kind_t::VALUE_TEXT, true, false, "ipaddress");
893
                    } else if (field_type == "port") {
230✔
894
                        fd.with_kind(value_kind_t::VALUE_INTEGER, true);
96✔
895
                    } else if (field_type == "interval") {
182✔
896
                        fd.with_kind(value_kind_t::VALUE_FLOAT)
4✔
897
                            .with_numeric_index(numeric_count);
2✔
898
                        numeric_count += 1;
2✔
899
                    }
900

901
                    ++iter;
680✔
902
                } while (iter != ss.end());
680✔
903
            }
904
        }
168✔
905

906
        if (!this->blf_format_name.empty() && !this->blf_separator.empty()
48✔
907
            && !this->blf_field_defs.empty())
48✔
908
        {
909
            return this->scan_int(dst, li, sbr, sbc);
24✔
910
        }
911

912
        this->blf_format_name.clear();
×
913

914
        return scan_no_match{"no header found"};
×
915
    }
2,598✔
916

917
    void annotate(logfile* lf,
29,996✔
918
                  uint64_t line_number,
919
                  string_attrs_t& sa,
920
                  logline_value_vector& values) const override
921
    {
922
        static const intern_string_t UID = intern_string::lookup("bro_uid");
30,036✔
923

924
        auto& sbr = values.lvv_sbr;
29,996✔
925
        separated_string ss(sbr.get_data(), sbr.length());
29,996✔
926

927
        ss.with_separator(this->blf_separator.get());
29,996✔
928

929
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
670,328✔
930
            if (iter.index() >= this->blf_field_defs.size()) {
640,537✔
931
                return;
205✔
932
            }
933

934
            const field_def& fd = this->blf_field_defs[iter.index()];
640,332✔
935
            string_fragment sf = *iter;
640,332✔
936

937
            if (sf == this->blf_empty_field) {
640,332✔
938
                sf.clear();
29,799✔
939
            } else if (sf == this->blf_unset_field) {
610,533✔
940
                sf.invalidate();
70,910✔
941
            }
942

943
            auto lr = line_range(sf.sf_begin, sf.sf_end);
640,332✔
944

945
            if (fd.fd_meta.lvm_name == TS) {
640,332✔
946
                sa.emplace_back(lr, L_TIMESTAMP.value());
29,996✔
947
            } else if (fd.fd_meta.lvm_name == UID) {
610,336✔
948
                sa.emplace_back(lr, L_OPID.value());
29,996✔
949
                values.lvv_opid_value = sf.to_string();
29,996✔
950
                values.lvv_opid_provenance
951
                    = logline_value_vector::opid_provenance::file;
29,996✔
952
            }
953

954
            if (lr.is_valid()) {
640,332✔
955
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
569,422✔
956
            } else {
957
                values.lvv_values.emplace_back(fd.fd_meta);
70,910✔
958
            }
959
            values.lvv_values.back().lv_meta.lvm_user_hidden
640,332✔
960
                = fd.fd_root_meta->lvm_user_hidden;
640,332✔
961
        }
962

963
        log_format::annotate(lf, line_number, sa, values);
29,791✔
964
    }
965

966
    std::optional<size_t> stats_index_for_value(
35✔
967
        const intern_string_t& name) const override
968
    {
969
        for (const auto& blf_field_def : this->blf_field_defs) {
525✔
970
            if (blf_field_def.fd_meta.lvm_name == name) {
525✔
971
                if (!blf_field_def.fd_numeric_index) {
35✔
972
                    break;
×
973
                }
974
                return blf_field_def.fd_numeric_index.value();
35✔
975
            }
976
        }
977

978
        return std::nullopt;
×
979
    }
980

981
    bool hide_field(intern_string_t field_name, bool val) override
2✔
982
    {
983
        if (field_name == LOG_TIME_STR) {
2✔
984
            field_name = TS;
×
985
        }
986

987
        auto fd_iter = FIELD_META.find(field_name);
2✔
988
        if (fd_iter == FIELD_META.end()) {
2✔
989
            return false;
×
990
        }
991

992
        fd_iter->second.lvm_user_hidden = val;
2✔
993

994
        return true;
2✔
995
    }
996

997
    std::map<intern_string_t, logline_value_meta> get_field_states() override
52✔
998
    {
999
        std::map<intern_string_t, logline_value_meta> retval;
52✔
1000

1001
        for (const auto& fd : FIELD_META) {
168✔
1002
            retval.emplace(fd.first, fd.second);
116✔
1003
        }
1004

1005
        return retval;
52✔
1006
    }
×
1007

1008
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
24✔
1009
    {
1010
        auto retval = std::make_shared<bro_log_format>(*this);
24✔
1011

1012
        retval->lf_specialized = true;
24✔
1013
        return retval;
48✔
1014
    }
24✔
1015

1016
    class bro_log_table : public log_format_vtab_impl {
1017
    public:
1018
        explicit bro_log_table(std::shared_ptr<const log_format> format)
22✔
1019
            : log_format_vtab_impl(format),
22✔
1020
              blt_format(dynamic_cast<const bro_log_format*>(format.get()))
22✔
1021
        {
1022
        }
22✔
1023

1024
        void get_columns(std::vector<vtab_column>& cols) const override
31✔
1025
        {
1026
            for (const auto& fd : this->blt_format->blf_field_defs) {
914✔
1027
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
883✔
1028
                    fd.fd_meta.lvm_kind);
883✔
1029

1030
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
883✔
1031
                                  type_pair.first,
1032
                                  fd.fd_collator,
883✔
1033
                                  false,
1,766✔
1034
                                  "",
1035
                                  type_pair.second);
1036
            }
1037
        }
31✔
1038

1039
        void get_foreign_keys(
11✔
1040
            std::unordered_set<std::string>& keys_inout) const override
1041
        {
1042
            this->log_vtab_impl::get_foreign_keys(keys_inout);
11✔
1043

1044
            for (const auto& fd : this->blt_format->blf_field_defs) {
322✔
1045
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
311✔
1046
                    keys_inout.emplace(fd.fd_meta.lvm_name.to_string());
136✔
1047
                }
1048
            }
1049
        }
11✔
1050

1051
        const bro_log_format* blt_format;
1052
    };
1053

1054
    static std::map<intern_string_t, std::shared_ptr<bro_log_table>>&
1055
    get_tables()
22✔
1056
    {
1057
        static std::map<intern_string_t, std::shared_ptr<bro_log_table>> retval;
22✔
1058

1059
        return retval;
22✔
1060
    }
1061

1062
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
654✔
1063
    {
1064
        if (this->blf_format_name.empty()) {
654✔
1065
            return nullptr;
632✔
1066
        }
1067

1068
        std::shared_ptr<bro_log_table> retval = nullptr;
22✔
1069

1070
        auto& tables = get_tables();
22✔
1071
        const auto iter = tables.find(this->blf_format_name);
22✔
1072
        if (iter == tables.end()) {
22✔
1073
            retval = std::make_shared<bro_log_table>(this->shared_from_this());
22✔
1074
            tables[this->blf_format_name] = retval;
22✔
1075
        }
1076

1077
        return retval;
22✔
1078
    }
22✔
1079

1080
    void get_subline(const log_format_file_state& lffs,
34,580✔
1081
                     const logline& ll,
1082
                     shared_buffer_ref& sbr,
1083
                     subline_options opts) override
1084
    {
1085
    }
34,580✔
1086

1087
    intern_string_t blf_format_name;
1088
    intern_string_t blf_separator;
1089
    intern_string_t blf_set_separator;
1090
    intern_string_t blf_empty_field;
1091
    intern_string_t blf_unset_field;
1092
    std::vector<field_def> blf_field_defs;
1093
};
1094

1095
std::unordered_map<const intern_string_t, logline_value_meta>
1096
    bro_log_format::FIELD_META;
1097

1098
const intern_string_t bro_log_format::TS = intern_string::lookup("bro_ts");
1099
const intern_string_t bro_log_format::DURATION
1100
    = intern_string::lookup("bro_duration");
1101

1102
struct ws_separated_string {
1103
    const char* ss_str;
1104
    size_t ss_len;
1105

1106
    explicit ws_separated_string(const char* str = nullptr, size_t len = -1)
22,909✔
1107
        : ss_str(str), ss_len(len)
22,909✔
1108
    {
1109
    }
22,909✔
1110

1111
    struct iterator {
1112
        enum class state_t {
1113
            NORMAL,
1114
            QUOTED,
1115
        };
1116

1117
        const ws_separated_string& i_parent;
1118
        const char* i_pos;
1119
        const char* i_next_pos;
1120
        size_t i_index{0};
1121
        state_t i_state{state_t::NORMAL};
1122

1123
        iterator(const ws_separated_string& ss, const char* pos)
71,767✔
1124
            : i_parent(ss), i_pos(pos), i_next_pos(pos)
71,767✔
1125
        {
1126
            this->update();
71,767✔
1127
        }
71,767✔
1128

1129
        void update()
117,844✔
1130
        {
1131
            const auto& ss = this->i_parent;
117,844✔
1132
            bool done = false;
117,844✔
1133

1134
            while (!done && this->i_next_pos < (ss.ss_str + ss.ss_len)) {
864,073✔
1135
                switch (this->i_state) {
746,229✔
1136
                    case state_t::NORMAL:
739,471✔
1137
                        if (*this->i_next_pos == '"') {
739,471✔
1138
                            this->i_state = state_t::QUOTED;
255✔
1139
                        } else if (isspace(*this->i_next_pos)) {
739,216✔
1140
                            done = true;
60,280✔
1141
                        }
1142
                        break;
739,471✔
1143
                    case state_t::QUOTED:
6,758✔
1144
                        if (*this->i_next_pos == '"') {
6,758✔
1145
                            this->i_state = state_t::NORMAL;
255✔
1146
                        }
1147
                        break;
6,758✔
1148
                }
1149
                if (!done) {
746,229✔
1150
                    this->i_next_pos += 1;
685,949✔
1151
                }
1152
            }
1153
        }
117,844✔
1154

1155
        iterator& operator++()
46,077✔
1156
        {
1157
            const auto& ss = this->i_parent;
46,077✔
1158

1159
            this->i_pos = this->i_next_pos;
46,077✔
1160
            while (this->i_pos < (ss.ss_str + ss.ss_len)
46,077✔
1161
                   && isspace(*this->i_pos))
89,535✔
1162
            {
1163
                this->i_pos += 1;
43,458✔
1164
                this->i_next_pos += 1;
43,458✔
1165
            }
1166
            this->update();
46,077✔
1167
            this->i_index += 1;
46,077✔
1168

1169
            return *this;
46,077✔
1170
        }
1171

1172
        string_fragment operator*()
63,621✔
1173
        {
1174
            const auto& ss = this->i_parent;
63,621✔
1175
            int end = this->i_next_pos - ss.ss_str;
63,621✔
1176

1177
            return string_fragment(ss.ss_str, this->i_pos - ss.ss_str, end);
63,621✔
1178
        }
1179

1180
        bool operator==(const iterator& other) const
48,858✔
1181
        {
1182
            return (&this->i_parent == &other.i_parent)
48,858✔
1183
                && (this->i_pos == other.i_pos);
48,858✔
1184
        }
1185

1186
        bool operator!=(const iterator& other) const
46,065✔
1187
        {
1188
            return !(*this == other);
46,065✔
1189
        }
1190

1191
        size_t index() const { return this->i_index; }
86,567✔
1192
    };
1193

1194
    iterator begin() { return {*this, this->ss_str}; }
22,909✔
1195

1196
    iterator end() { return {*this, this->ss_str + this->ss_len}; }
48,858✔
1197
};
1198

1199
class w3c_log_format : public log_format {
1200
public:
1201
    static const intern_string_t F_DATE;
1202
    static const intern_string_t F_TIME;
1203

1204
    struct field_def {
1205
        const intern_string_t fd_name;
1206
        logline_value_meta fd_meta;
1207
        logline_value_meta* fd_root_meta{nullptr};
1208
        std::string fd_collator;
1209
        std::optional<size_t> fd_numeric_index;
1210

1211
        explicit field_def(const intern_string_t name)
22✔
1212
            : fd_name(name), fd_meta(intern_string::lookup(sql_safe_ident(
44✔
1213
                                         name.to_string_fragment())),
44✔
1214
                                     value_kind_t::VALUE_TEXT)
22✔
1215
        {
1216
        }
22✔
1217

1218
        field_def(const intern_string_t name, logline_value_meta meta)
71✔
1219
            : fd_name(name), fd_meta(meta)
71✔
1220
        {
1221
        }
71✔
1222

1223
        field_def(size_t col,
9,792✔
1224
                  const char* name,
1225
                  value_kind_t kind,
1226
                  bool ident = false,
1227
                  bool foreign_key = false,
1228
                  std::string coll = "")
1229
            : fd_name(intern_string::lookup(name)),
19,584✔
1230
              fd_meta(
19,584✔
1231
                  intern_string::lookup(sql_safe_ident(string_fragment(name))),
19,584✔
1232
                  kind,
1233
                  logline_value_meta::table_column{col}),
9,792✔
1234
              fd_collator(std::move(coll))
9,792✔
1235
        {
1236
            this->fd_meta.lvm_identifier = ident;
9,792✔
1237
            this->fd_meta.lvm_foreign_key = foreign_key;
9,792✔
1238
        }
9,792✔
1239

1240
        field_def& with_kind(value_kind_t kind,
1241
                             bool identifier = false,
1242
                             const std::string& collator = "")
1243
        {
1244
            this->fd_meta.lvm_kind = kind;
1245
            this->fd_meta.lvm_identifier = identifier;
1246
            this->fd_collator = collator;
1247
            return *this;
1248
        }
1249

1250
        field_def& with_numeric_index(int index)
51✔
1251
        {
1252
            this->fd_numeric_index = index;
51✔
1253
            return *this;
51✔
1254
        }
1255
    };
1256

1257
    static std::unordered_map<const intern_string_t, logline_value_meta>
1258
        FIELD_META;
1259

1260
    struct field_to_struct_t {
1261
        field_to_struct_t(const char* prefix, const char* struct_name)
2,448✔
1262
            : fs_prefix(prefix),
2,448✔
1263
              fs_struct_name(intern_string::lookup(struct_name))
4,896✔
1264
        {
1265
        }
2,448✔
1266

1267
        const char* fs_prefix;
1268
        intern_string_t fs_struct_name;
1269
    };
1270

1271
    static const std::array<field_def, 16>& get_known_fields()
629✔
1272
    {
1273
        static size_t KNOWN_FIELD_INDEX = 0;
1274
        static const std::array<field_def, 16> RETVAL = {
1275
            field_def{
1276
                KNOWN_FIELD_INDEX++,
1277
                "cs-method",
1278
                value_kind_t::VALUE_TEXT,
1279
                true,
1280
            },
1281
            {
1282
                KNOWN_FIELD_INDEX++,
1283
                "c-ip",
1284
                value_kind_t::VALUE_TEXT,
1285
                true,
1286
                false,
1287
                "ipaddress",
1288
            },
1289
            {
1290
                KNOWN_FIELD_INDEX++,
1291
                "cs-bytes",
1292
                value_kind_t::VALUE_INTEGER,
1293
                false,
1294
            },
1295
            {
1296
                KNOWN_FIELD_INDEX++,
1297
                "cs-host",
1298
                value_kind_t::VALUE_TEXT,
1299
                true,
1300
            },
1301
            {
1302
                KNOWN_FIELD_INDEX++,
1303
                "cs-uri-stem",
1304
                value_kind_t::VALUE_TEXT,
1305
                true,
1306
                false,
1307
                "naturalnocase",
1308
            },
1309
            {
1310
                KNOWN_FIELD_INDEX++,
1311
                "cs-uri-query",
1312
                value_kind_t::VALUE_TEXT,
1313
                false,
1314
            },
1315
            {
1316
                KNOWN_FIELD_INDEX++,
1317
                "cs-username",
1318
                value_kind_t::VALUE_TEXT,
1319
                false,
1320
            },
1321
            {
1322
                KNOWN_FIELD_INDEX++,
1323
                "cs-version",
1324
                value_kind_t::VALUE_TEXT,
1325
                true,
1326
            },
1327
            {
1328
                KNOWN_FIELD_INDEX++,
1329
                "s-ip",
1330
                value_kind_t::VALUE_TEXT,
1331
                true,
1332
                false,
1333
                "ipaddress",
1334
            },
1335
            {
1336
                KNOWN_FIELD_INDEX++,
1337
                "s-port",
1338
                value_kind_t::VALUE_INTEGER,
1339
                true,
1340
            },
1341
            {
1342
                KNOWN_FIELD_INDEX++,
1343
                "s-computername",
1344
                value_kind_t::VALUE_TEXT,
1345
                true,
1346
            },
1347
            {
1348
                KNOWN_FIELD_INDEX++,
1349
                "s-sitename",
1350
                value_kind_t::VALUE_TEXT,
1351
                true,
1352
            },
1353
            {
1354
                KNOWN_FIELD_INDEX++,
1355
                "sc-bytes",
1356
                value_kind_t::VALUE_INTEGER,
1357
                false,
1358
            },
1359
            {
1360
                KNOWN_FIELD_INDEX++,
1361
                "sc-status",
1362
                value_kind_t::VALUE_INTEGER,
1363
                false,
1364
                true,
1365
            },
1366
            {
1367
                KNOWN_FIELD_INDEX++,
1368
                "sc-substatus",
1369
                value_kind_t::VALUE_INTEGER,
1370
                false,
1371
            },
1372
            {
1373
                KNOWN_FIELD_INDEX++,
1374
                "time-taken",
1375
                value_kind_t::VALUE_FLOAT,
1376
                false,
1377
            },
1378
        };
1,853✔
1379

1380
        return RETVAL;
629✔
1381
    }
1382

1383
    static const std::array<field_to_struct_t, 4>& get_known_struct_fields()
626✔
1384
    {
1385
        static const std::array<field_to_struct_t, 4> RETVAL = {
1386
            field_to_struct_t{"cs(", "cs_headers"},
1387
            {"sc(", "sc_headers"},
1388
            {"rs(", "rs_headers"},
1389
            {"sr(", "sr_headers"},
1390
        };
626✔
1391

1392
        return RETVAL;
626✔
1393
    }
1394

1395
    w3c_log_format()
752✔
1396
    {
752✔
1397
        this->lf_is_self_describing = true;
752✔
1398
        this->lf_time_ordered = false;
752✔
1399
        this->lf_structured = true;
752✔
1400
    }
752✔
1401

1402
    const intern_string_t get_name() const override
15,702✔
1403
    {
1404
        static const intern_string_t name(intern_string::lookup("w3c_log"));
17,194✔
1405

1406
        return this->wlf_format_name.empty() ? name : this->wlf_format_name;
15,702✔
1407
    }
1408

1409
    void clear() override
15,342✔
1410
    {
1411
        this->log_format::clear();
15,342✔
1412
        this->wlf_time_scanner.clear();
15,342✔
1413
        this->wlf_format_name.clear();
15,342✔
1414
        this->wlf_field_defs.clear();
15,342✔
1415
    }
15,342✔
1416

1417
    std::vector<logline_value_meta> get_value_metadata() const override
×
1418
    {
1419
        std::vector<logline_value_meta> retval;
×
1420

1421
        for (const auto& fd : this->wlf_field_defs) {
×
1422
            retval.emplace_back(fd.fd_meta);
×
1423
        }
1424
        return retval;
×
1425
    }
×
1426

1427
    scan_result_t scan_int(std::vector<logline>& dst,
1,315✔
1428
                           const line_info& li,
1429
                           shared_buffer_ref& sbr,
1430
                           scan_batch_context& sbc)
1431
    {
1432
        static const intern_string_t F_DATE_LOCAL
1433
            = intern_string::lookup("date-local");
1,349✔
1434
        static const intern_string_t F_DATE_UTC
1435
            = intern_string::lookup("date-UTC");
1,349✔
1436
        static const intern_string_t F_TIME_LOCAL
1437
            = intern_string::lookup("time-local");
1,349✔
1438
        static const intern_string_t F_TIME_UTC
1439
            = intern_string::lookup("time-UTC");
1,349✔
1440
        static const intern_string_t F_STATUS_CODE
1441
            = intern_string::lookup("sc-status");
1,349✔
1442

1443
        ws_separated_string ss(sbr.get_data(), sbr.length());
1,315✔
1444
        timeval date_tv{0, 0}, time_tv{0, 0};
1,315✔
1445
        exttm date_tm, time_tm;
1,315✔
1446
        bool found_date = false, found_time = false;
1,315✔
1447
        log_level_t level = LEVEL_INFO;
1,315✔
1448

1449
        sbc.sbc_value_stats.resize(this->wlf_field_defs.size());
1,315✔
1450
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
19,693✔
1451
            if (iter.index() >= this->wlf_field_defs.size()) {
18,588✔
1452
                level = LEVEL_INVALID;
1✔
1453
                break;
1✔
1454
            }
1455

1456
            const auto& fd = this->wlf_field_defs[iter.index()];
18,587✔
1457
            string_fragment sf = *iter;
18,587✔
1458

1459
            if (sf.startswith("#")) {
18,587✔
1460
                if (sf == "#Date:") {
209✔
1461
                    auto sbr_sf_opt
1462
                        = sbr.to_string_fragment().consume_n(sf.length());
53✔
1463

1464
                    if (sbr_sf_opt) {
53✔
1465
                        auto sbr_sf = sbr_sf_opt.value().trim();
53✔
1466
                        date_time_scanner dts;
53✔
1467
                        exttm tm;
53✔
1468
                        timeval tv;
1469

1470
                        if (dts.scan(sbr_sf.data(),
53✔
1471
                                     sbr_sf.length(),
53✔
1472
                                     nullptr,
1473
                                     &tm,
1474
                                     tv))
1475
                        {
1476
                            this->lf_date_time.set_base_time(tv.tv_sec,
52✔
1477
                                                             tm.et_tm);
1478
                            this->wlf_time_scanner.set_base_time(tv.tv_sec,
52✔
1479
                                                                 tm.et_tm);
1480
                        }
1481
                    }
1482
                }
1483
                dst.emplace_back(li.li_file_range.fr_offset,
209✔
1484
                                 std::chrono::microseconds{0},
×
1485
                                 LEVEL_IGNORE);
209✔
1486
                return scan_match{2000};
209✔
1487
            }
1488

1489
            sf = sf.trim("\" \t");
18,378✔
1490
            if (F_DATE == fd.fd_name || F_DATE_LOCAL == fd.fd_name
35,694✔
1491
                || F_DATE_UTC == fd.fd_name)
35,694✔
1492
            {
1493
                if (this->lf_date_time.scan(
1,070✔
1494
                        sf.data(), sf.length(), nullptr, &date_tm, date_tv))
1,070✔
1495
                {
1496
                    this->lf_timestamp_flags |= date_tm.et_flags;
1,069✔
1497
                    found_date = true;
1,069✔
1498
                }
1499
            } else if (F_TIME == fd.fd_name || F_TIME_LOCAL == fd.fd_name
33,523✔
1500
                       || F_TIME_UTC == fd.fd_name)
33,523✔
1501
            {
1502
                if (this->wlf_time_scanner.scan(
1,101✔
1503
                        sf.data(), sf.length(), nullptr, &time_tm, time_tv))
1,101✔
1504
                {
1505
                    this->lf_timestamp_flags |= time_tm.et_flags;
1,101✔
1506
                    found_time = true;
1,101✔
1507
                }
1508
            } else if (F_STATUS_CODE == fd.fd_name) {
16,207✔
1509
                if (!sf.empty() && sf[0] >= '4') {
1,098✔
1510
                    level = LEVEL_ERROR;
1,018✔
1511
                }
1512
            }
1513

1514
            if (fd.fd_numeric_index) {
18,378✔
1515
                switch (fd.fd_meta.lvm_kind) {
6,402✔
1516
                    case value_kind_t::VALUE_INTEGER:
6,402✔
1517
                    case value_kind_t::VALUE_FLOAT: {
1518
                        auto scan_float_res
1519
                            = scn::scan_value<double>(sf.to_string_view());
6,402✔
1520

1521
                        if (scan_float_res) {
6,402✔
1522
                            sbc.sbc_value_stats[fd.fd_numeric_index.value()]
6,398✔
1523
                                .add_value(scan_float_res->value());
6,398✔
1524
                        }
1525
                        break;
6,402✔
1526
                    }
1527
                    default:
×
1528
                        break;
×
1529
                }
1530
            }
1531
        }
1532

1533
        if (found_time) {
1,106✔
1534
            auto tm = time_tm;
1,101✔
1535

1536
            if (found_date) {
1,101✔
1537
                tm.et_tm.tm_year = date_tm.et_tm.tm_year;
1,069✔
1538
                tm.et_tm.tm_mday = date_tm.et_tm.tm_mday;
1,069✔
1539
                tm.et_tm.tm_mon = date_tm.et_tm.tm_mon;
1,069✔
1540
                tm.et_tm.tm_wday = date_tm.et_tm.tm_wday;
1,069✔
1541
                tm.et_tm.tm_yday = date_tm.et_tm.tm_yday;
1,069✔
1542
            }
1543

1544
            auto tv = tm.to_timeval();
1,101✔
1545
            if (!this->lf_specialized) {
1,101✔
1546
                for (auto& ll : dst) {
70✔
1547
                    ll.set_ignore(true);
56✔
1548
                }
1549
            }
1550
            dst.emplace_back(li.li_file_range.fr_offset, tv, level);
1,101✔
1551
            return scan_match{2000};
1,101✔
1552
        }
1553

1554
        return scan_no_match{"no header found"};
5✔
1555
    }
1556

1557
    scan_result_t scan(logfile& lf,
12,550✔
1558
                       std::vector<logline>& dst,
1559
                       const line_info& li,
1560
                       shared_buffer_ref& sbr,
1561
                       scan_batch_context& sbc) override
1562
    {
1563
        static const auto* W3C_LOG_NAME = intern_string::lookup("w3c_log");
13,774✔
1564
        static const auto* X_FIELDS_NAME = intern_string::lookup("x_fields");
13,774✔
1565
        static const auto& KNOWN_FIELDS = get_known_fields();
12,550✔
1566
        static const auto& KNOWN_STRUCT_FIELDS = get_known_struct_fields();
12,550✔
1567
        static auto X_FIELDS_IDX = 0;
1568

1569
        if (li.li_partial) {
12,550✔
1570
            return scan_incomplete{};
19✔
1571
        }
1572

1573
        if (dst.empty()) {
12,531✔
1574
            auto file_options = lf.get_file_options();
1,139✔
1575

1576
            if (file_options) {
1,139✔
1577
                this->lf_date_time.dts_default_zone
1578
                    = file_options->second.fo_default_zone.pp_value;
57✔
1579
            } else {
1580
                this->lf_date_time.dts_default_zone = nullptr;
1,082✔
1581
            }
1582
        }
1,139✔
1583

1584
        if (!this->wlf_format_name.empty()) {
12,531✔
1585
            return this->scan_int(dst, li, sbr, sbc);
1,296✔
1586
        }
1587

1588
        if (dst.empty() || dst.size() > 20 || sbr.empty()
21,331✔
1589
            || sbr.get_data()[0] == '#')
21,331✔
1590
        {
1591
            return scan_no_match{"no header found"};
8,423✔
1592
        }
1593

1594
        this->clear();
2,812✔
1595

1596
        for (auto line_iter = dst.begin(); line_iter != dst.end(); ++line_iter)
22,940✔
1597
        {
1598
            auto next_read_result = lf.read_line(line_iter);
20,128✔
1599

1600
            if (next_read_result.isErr()) {
20,128✔
1601
                return scan_no_match{"unable to read first line"};
×
1602
            }
1603

1604
            auto line = next_read_result.unwrap();
20,128✔
1605
            ws_separated_string ss(line.get_data(), line.length());
20,128✔
1606
            auto iter = ss.begin();
20,128✔
1607
            const auto directive = *iter;
20,128✔
1608

1609
            if (directive.empty() || directive[0] != '#') {
20,128✔
1610
                continue;
17,335✔
1611
            }
1612

1613
            ++iter;
2,793✔
1614
            if (iter == ss.end()) {
2,793✔
1615
                continue;
41✔
1616
            }
1617

1618
            if (directive == "#Date:") {
2,752✔
1619
                date_time_scanner dts;
12✔
1620
                struct exttm tm;
12✔
1621
                struct timeval tv;
1622

1623
                if (dts.scan(line.get_data_at(directive.length() + 1),
12✔
1624
                             line.length() - directive.length() - 1,
12✔
1625
                             nullptr,
1626
                             &tm,
1627
                             tv))
1628
                {
1629
                    this->lf_date_time.set_base_time(tv.tv_sec, tm.et_tm);
11✔
1630
                    this->wlf_time_scanner.set_base_time(tv.tv_sec, tm.et_tm);
11✔
1631
                }
1632
            } else if (directive == "#Fields:" && this->wlf_field_defs.empty())
2,740✔
1633
            {
1634
                int numeric_count = 0;
19✔
1635

1636
                do {
1637
                    auto sf = (*iter).trim(")");
210✔
1638

1639
                    auto field_iter = std::find_if(
630✔
1640
                        begin(KNOWN_FIELDS),
1641
                        end(KNOWN_FIELDS),
1642
                        [&sf](auto elem) { return sf == elem.fd_name; });
2,400✔
1643
                    if (field_iter != end(KNOWN_FIELDS)) {
420✔
1644
                        this->wlf_field_defs.emplace_back(*field_iter);
117✔
1645
                        auto& fd = this->wlf_field_defs.back();
117✔
1646
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
117✔
1647
                        if (common_iter == FIELD_META.end()) {
117✔
1648
                            auto emp_res = FIELD_META.emplace(
116✔
1649
                                fd.fd_meta.lvm_name, fd.fd_meta);
116✔
1650
                            common_iter = emp_res.first;
116✔
1651
                        }
1652
                        fd.fd_root_meta = &common_iter->second;
117✔
1653
                    } else if (sf.is_one_of("date", "time")) {
93✔
1654
                        this->wlf_field_defs.emplace_back(
44✔
1655
                            intern_string::lookup(sf));
22✔
1656
                        auto& fd = this->wlf_field_defs.back();
22✔
1657
                        auto common_iter = FIELD_META.find(fd.fd_meta.lvm_name);
22✔
1658
                        if (common_iter == FIELD_META.end()) {
22✔
1659
                            auto emp_res = FIELD_META.emplace(
21✔
1660
                                fd.fd_meta.lvm_name, fd.fd_meta);
21✔
1661
                            common_iter = emp_res.first;
21✔
1662
                        }
1663
                        fd.fd_root_meta = &common_iter->second;
22✔
1664
                    } else {
1665
                        const auto fs_iter = std::find_if(
213✔
1666
                            begin(KNOWN_STRUCT_FIELDS),
1667
                            end(KNOWN_STRUCT_FIELDS),
1668
                            [&sf](auto elem) {
221✔
1669
                                return sf.startswith(elem.fs_prefix);
221✔
1670
                            });
1671
                        if (fs_iter != end(KNOWN_STRUCT_FIELDS)) {
142✔
1672
                            const intern_string_t field_name
1673
                                = intern_string::lookup(sf.substr(3));
21✔
1674
                            this->wlf_field_defs.emplace_back(
21✔
1675
                                field_name,
1676
                                logline_value_meta(
42✔
1677
                                    field_name,
1678
                                    value_kind_t::VALUE_TEXT,
1679
                                    logline_value_meta::table_column{
×
1680
                                        KNOWN_FIELDS.size() + 1
21✔
1681
                                        + std::distance(
63✔
1682
                                            begin(KNOWN_STRUCT_FIELDS),
1683
                                            fs_iter)},
1684
                                    this)
42✔
1685
                                    .with_struct_name(fs_iter->fs_struct_name));
1686
                        } else {
1687
                            const intern_string_t field_name
1688
                                = intern_string::lookup(sf);
50✔
1689
                            this->wlf_field_defs.emplace_back(
50✔
1690
                                field_name,
1691
                                logline_value_meta(
100✔
1692
                                    field_name,
1693
                                    value_kind_t::VALUE_TEXT,
1694
                                    logline_value_meta::table_column{
×
1695
                                        KNOWN_FIELDS.size() + X_FIELDS_IDX},
100✔
1696
                                    this)
100✔
1697
                                    .with_struct_name(X_FIELDS_NAME));
1698
                        }
1699
                    }
1700
                    auto& fd = this->wlf_field_defs.back();
210✔
1701
                    fd.fd_meta.lvm_format = std::make_optional(this);
210✔
1702
                    switch (fd.fd_meta.lvm_kind) {
210✔
1703
                        case value_kind_t::VALUE_FLOAT:
51✔
1704
                        case value_kind_t::VALUE_INTEGER:
1705
                            fd.with_numeric_index(numeric_count);
51✔
1706
                            numeric_count += 1;
51✔
1707
                            break;
51✔
1708
                        default:
159✔
1709
                            break;
159✔
1710
                    }
1711

1712
                    ++iter;
210✔
1713
                } while (iter != ss.end());
210✔
1714

1715
                this->wlf_format_name = W3C_LOG_NAME;
19✔
1716
            }
1717
        }
37,504✔
1718

1719
        if (!this->wlf_format_name.empty() && !this->wlf_field_defs.empty()) {
2,812✔
1720
            return this->scan_int(dst, li, sbr, sbc);
19✔
1721
        }
1722

1723
        this->wlf_format_name.clear();
2,793✔
1724

1725
        return scan_no_match{"no header found"};
2,793✔
1726
    }
1727

1728
    void annotate(logfile* lf,
1,466✔
1729
                  uint64_t line_number,
1730
                  string_attrs_t& sa,
1731
                  logline_value_vector& values) const override
1732
    {
1733
        auto& sbr = values.lvv_sbr;
1,466✔
1734
        ws_separated_string ss(sbr.get_data(), sbr.length());
1,466✔
1735
        std::optional<line_range> date_lr;
1,466✔
1736
        std::optional<line_range> time_lr;
1,466✔
1737

1738
        for (auto iter = ss.begin(); iter != ss.end(); ++iter) {
26,162✔
1739
            auto sf = *iter;
24,696✔
1740

1741
            if (iter.index() >= this->wlf_field_defs.size()) {
24,696✔
1742
                sa.emplace_back(line_range{sf.sf_begin, -1},
×
1743
                                SA_INVALID.value("extra fields detected"));
×
1744
                return;
×
1745
            }
1746

1747
            const auto& fd = this->wlf_field_defs[iter.index()];
24,696✔
1748

1749
            if (sf == "-") {
24,696✔
1750
                sf.invalidate();
4,300✔
1751
            }
1752

1753
            auto lr = line_range(sf.sf_begin, sf.sf_end);
24,696✔
1754

1755
            if (lr.is_valid()) {
24,696✔
1756
                if (fd.fd_meta.lvm_name == F_DATE) {
20,396✔
1757
                    date_lr = lr;
1,444✔
1758
                } else if (fd.fd_meta.lvm_name == F_TIME) {
18,952✔
1759
                    time_lr = lr;
1,458✔
1760
                }
1761
                values.lvv_values.emplace_back(fd.fd_meta, sbr, lr);
20,396✔
1762
                if (sf.startswith("\"")) {
20,396✔
1763
                    auto& meta = values.lvv_values.back().lv_meta;
28✔
1764

1765
                    if (meta.lvm_kind == value_kind_t::VALUE_TEXT) {
28✔
1766
                        meta.lvm_kind = value_kind_t::VALUE_W3C_QUOTED;
26✔
1767
                    } else {
1768
                        meta.lvm_kind = value_kind_t::VALUE_NULL;
2✔
1769
                    }
1770
                }
1771
            } else {
1772
                values.lvv_values.emplace_back(fd.fd_meta);
4,300✔
1773
            }
1774
            if (fd.fd_root_meta != nullptr) {
24,696✔
1775
                values.lvv_values.back().lv_meta.lvm_user_hidden
20,318✔
1776
                    = fd.fd_root_meta->lvm_user_hidden;
20,318✔
1777
            }
1778
        }
1779
        if (time_lr) {
1,466✔
1780
            auto ts_lr = time_lr.value();
1,458✔
1781
            if (date_lr) {
1,458✔
1782
                if (date_lr->lr_end + 1 == time_lr->lr_start) {
1,443✔
1783
                    ts_lr.lr_start = date_lr->lr_start;
1,442✔
1784
                    ts_lr.lr_end = time_lr->lr_end;
1,442✔
1785
                }
1786
            }
1787

1788
            sa.emplace_back(ts_lr, L_TIMESTAMP.value());
1,458✔
1789
        }
1790
        log_format::annotate(lf, line_number, sa, values);
1,466✔
1791
    }
1792

1793
    std::optional<size_t> stats_index_for_value(
×
1794
        const intern_string_t& name) const override
1795
    {
1796
        for (const auto& wlf_field_def : this->wlf_field_defs) {
×
1797
            if (wlf_field_def.fd_meta.lvm_name == name) {
×
1798
                if (!wlf_field_def.fd_numeric_index) {
×
1799
                    break;
×
1800
                }
1801
                return wlf_field_def.fd_numeric_index.value();
×
1802
            }
1803
        }
1804

1805
        return std::nullopt;
×
1806
    }
1807

1808
    bool hide_field(const intern_string_t field_name, bool val) override
×
1809
    {
1810
        if (field_name == LOG_TIME_STR) {
×
1811
            auto date_iter = FIELD_META.find(F_DATE);
×
1812
            auto time_iter = FIELD_META.find(F_TIME);
×
1813
            if (date_iter == FIELD_META.end() || time_iter == FIELD_META.end())
×
1814
            {
1815
                return false;
×
1816
            }
1817
            date_iter->second.lvm_user_hidden = val;
×
1818
            time_iter->second.lvm_user_hidden = val;
×
1819
            return true;
×
1820
        }
1821

1822
        auto fd_iter = FIELD_META.find(field_name);
×
1823
        if (fd_iter == FIELD_META.end()) {
×
1824
            return false;
×
1825
        }
1826

1827
        fd_iter->second.lvm_user_hidden = val;
×
1828

1829
        return true;
×
1830
    }
1831

1832
    std::map<intern_string_t, logline_value_meta> get_field_states() override
52✔
1833
    {
1834
        std::map<intern_string_t, logline_value_meta> retval;
52✔
1835

1836
        for (const auto& fd : FIELD_META) {
108✔
1837
            retval.emplace(fd.first, fd.second);
56✔
1838
        }
1839

1840
        return retval;
52✔
1841
    }
×
1842

1843
    std::shared_ptr<log_format> specialized(int fmt_lock = -1) override
14✔
1844
    {
1845
        auto retval = std::make_shared<w3c_log_format>(*this);
14✔
1846

1847
        retval->lf_specialized = true;
14✔
1848
        return retval;
28✔
1849
    }
14✔
1850

1851
    class w3c_log_table : public log_format_vtab_impl {
1852
    public:
1853
        explicit w3c_log_table(std::shared_ptr<const log_format> format)
11✔
1854
            : log_format_vtab_impl(format)
11✔
1855
        {
1856
        }
11✔
1857

1858
        void get_columns(std::vector<vtab_column>& cols) const override
14✔
1859
        {
1860
            for (const auto& fd : get_known_fields()) {
238✔
1861
                auto type_pair = log_vtab_impl::logline_value_to_sqlite_type(
224✔
1862
                    fd.fd_meta.lvm_kind);
224✔
1863

1864
                cols.emplace_back(fd.fd_meta.lvm_name.to_string(),
224✔
1865
                                  type_pair.first,
1866
                                  fd.fd_collator,
224✔
1867
                                  false,
448✔
1868
                                  "",
1869
                                  type_pair.second);
1870
            }
1871
            cols.emplace_back("x_fields");
14✔
1872
            cols.back().with_comment(
28✔
1873
                "A JSON-object that contains fields that are not first-class "
1874
                "columns");
1875
            for (const auto& fs : get_known_struct_fields()) {
70✔
1876
                cols.emplace_back(fs.fs_struct_name.to_string());
56✔
1877
            }
1878
        }
14✔
1879

1880
        void get_foreign_keys(
3✔
1881
            std::unordered_set<std::string>& keys_inout) const override
1882
        {
1883
            this->log_vtab_impl::get_foreign_keys(keys_inout);
3✔
1884

1885
            for (const auto& fd : get_known_fields()) {
51✔
1886
                if (fd.fd_meta.lvm_identifier || fd.fd_meta.lvm_foreign_key) {
48✔
1887
                    keys_inout.emplace(fd.fd_meta.lvm_name.to_string());
30✔
1888
                }
1889
            }
1890
        }
3✔
1891
    };
1892

1893
    static std::map<intern_string_t, std::shared_ptr<w3c_log_table>>&
1894
    get_tables()
11✔
1895
    {
1896
        static std::map<intern_string_t, std::shared_ptr<w3c_log_table>> retval;
11✔
1897

1898
        return retval;
11✔
1899
    }
1900

1901
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
643✔
1902
    {
1903
        if (this->wlf_format_name.empty()) {
643✔
1904
            return nullptr;
632✔
1905
        }
1906

1907
        std::shared_ptr<w3c_log_table> retval = nullptr;
11✔
1908

1909
        auto& tables = get_tables();
11✔
1910
        const auto iter = tables.find(this->wlf_format_name);
11✔
1911
        if (iter == tables.end()) {
11✔
1912
            retval = std::make_shared<w3c_log_table>(this->shared_from_this());
11✔
1913
            tables[this->wlf_format_name] = retval;
11✔
1914
        }
1915

1916
        return retval;
11✔
1917
    }
11✔
1918

1919
    void get_subline(const log_format_file_state& lffs,
1,629✔
1920
                     const logline& ll,
1921
                     shared_buffer_ref& sbr,
1922
                     subline_options opts) override
1923
    {
1924
    }
1,629✔
1925

1926
    date_time_scanner wlf_time_scanner;
1927
    intern_string_t wlf_format_name;
1928
    std::vector<field_def> wlf_field_defs;
1929
};
1930

1931
std::unordered_map<const intern_string_t, logline_value_meta>
1932
    w3c_log_format::FIELD_META;
1933

1934
const intern_string_t w3c_log_format::F_DATE = intern_string::lookup("date");
1935
const intern_string_t w3c_log_format::F_TIME = intern_string::lookup("time");
1936

1937
struct logfmt_pair_handler {
1938
    explicit logfmt_pair_handler(date_time_scanner& dts) : lph_dt_scanner(dts)
12,530✔
1939
    {
1940
    }
12,530✔
1941

1942
    log_format::scan_result_t process_value(const string_fragment& value_frag)
4,300✔
1943
    {
1944
        if (this->lph_key_frag.is_one_of(
4,300✔
1945
                "timestamp"_frag, "time"_frag, "ts"_frag, "t"_frag))
1946
        {
1947
            if (!this->lph_dt_scanner.scan(value_frag.data(),
31✔
1948
                                           value_frag.length(),
31✔
1949
                                           nullptr,
1950
                                           &this->lph_time_tm,
1951
                                           this->lph_tv))
31✔
1952
            {
1953
                return log_format::scan_no_match{
×
1954
                    "timestamp value did not parse correctly"};
×
1955
            }
1956
            char buf[1024];
1957
            this->lph_dt_scanner.ftime(
31✔
1958
                buf, sizeof(buf), nullptr, this->lph_time_tm);
31✔
1959
            this->lph_found_time = true;
31✔
1960
        } else if (this->lph_key_frag.is_one_of("level"_frag, "lvl"_frag)) {
4,269✔
1961
            this->lph_level
1962
                = string2level(value_frag.data(), value_frag.length());
40✔
1963
        }
1964
        return log_format::scan_match{};
4,300✔
1965
    }
1966

1967
    date_time_scanner& lph_dt_scanner;
1968
    bool lph_found_time{false};
1969
    exttm lph_time_tm;
1970
    timeval lph_tv{0, 0};
1971
    log_level_t lph_level{log_level_t::LEVEL_INFO};
1972
    string_fragment lph_key_frag{""};
1973
};
1974

1975
class logfmt_format : public log_format {
1976
public:
1977
    const intern_string_t get_name() const override
15,237✔
1978
    {
1979
        const static intern_string_t NAME = intern_string::lookup("logfmt_log");
16,729✔
1980

1981
        return NAME;
15,237✔
1982
    }
1983

1984
    class logfmt_log_table : public log_format_vtab_impl {
1985
    public:
1986
        logfmt_log_table(std::shared_ptr<const log_format> format)
632✔
1987
            : log_format_vtab_impl(format)
632✔
1988
        {
1989
        }
632✔
1990

1991
        void get_columns(std::vector<vtab_column>& cols) const override
633✔
1992
        {
1993
            static const auto FIELDS = std::string("fields");
1,897✔
1994

1995
            cols.emplace_back(FIELDS);
633✔
1996
        }
633✔
1997
    };
1998

1999
    std::shared_ptr<log_vtab_impl> get_vtab_impl() const override
632✔
2000
    {
2001
        static auto retval
2002
            = std::make_shared<logfmt_log_table>(this->shared_from_this());
632✔
2003

2004
        return retval;
632✔
2005
    }
2006

2007
    scan_result_t scan(logfile& lf,
12,530✔
2008
                       std::vector<logline>& dst,
2009
                       const line_info& li,
2010
                       shared_buffer_ref& sbr,
2011
                       scan_batch_context& sbc) override
2012
    {
2013
        auto p = logfmt::parser(sbr.to_string_fragment());
12,530✔
2014
        scan_result_t retval = scan_no_match{};
12,530✔
2015
        bool done = false;
12,530✔
2016
        logfmt_pair_handler lph(this->lf_date_time);
12,530✔
2017

2018
        if (dst.empty()) {
12,530✔
2019
            auto file_options = lf.get_file_options();
1,152✔
2020

2021
            if (file_options) {
1,152✔
2022
                this->lf_date_time.dts_default_zone
2023
                    = file_options->second.fo_default_zone.pp_value;
57✔
2024
            } else {
2025
                this->lf_date_time.dts_default_zone = nullptr;
1,095✔
2026
            }
2027
        }
1,152✔
2028

2029
        while (!done) {
29,360✔
2030
            auto parse_result = p.step();
16,830✔
2031

2032
            auto value_res = parse_result.match(
2033
                [&done](const logfmt::parser::end_of_input&) -> scan_result_t {
×
2034
                    done = true;
238✔
2035
                    return scan_match{};
238✔
2036
                },
2037
                [&lph](const logfmt::parser::kvpair& kvp) -> scan_result_t {
×
2038
                    lph.lph_key_frag = kvp.first;
4,300✔
2039

2040
                    return kvp.second.match(
2041
                        [](const logfmt::parser::bool_value& bv)
×
2042
                            -> scan_result_t { return scan_match{}; },
×
2043
                        [&lph](const logfmt::parser::float_value& fv)
×
2044
                            -> scan_result_t {
2045
                            return lph.process_value(fv.fv_str_value);
5✔
2046
                        },
2047
                        [&lph](const logfmt::parser::int_value& iv)
×
2048
                            -> scan_result_t {
2049
                            return lph.process_value(iv.iv_str_value);
108✔
2050
                        },
2051
                        [&lph](const logfmt::parser::quoted_value& qv)
×
2052
                            -> scan_result_t {
2053
                            auto_mem<yajl_handle_t> handle(yajl_free);
343✔
2054
                            yajl_callbacks cb;
2055
                            scan_result_t retval;
343✔
2056

2057
                            memset(&cb, 0, sizeof(cb));
343✔
2058
                            handle = yajl_alloc(&cb, nullptr, &lph);
343✔
2059
                            cb.yajl_string = +[](void* ctx,
686✔
2060
                                                 const unsigned char* str,
2061
                                                 size_t len,
2062
                                                 yajl_string_props_t*) -> int {
2063
                                auto& lph = *((logfmt_pair_handler*) ctx);
343✔
2064
                                string_fragment value_frag{str, 0, (int) len};
343✔
2065

2066
                                auto value_res = lph.process_value(value_frag);
343✔
2067
                                return value_res.is<scan_match>();
686✔
2068
                            };
686✔
2069

2070
                            if (yajl_parse(
343✔
2071
                                    handle,
2072
                                    (const unsigned char*) qv.qv_value.data(),
343✔
2073
                                    qv.qv_value.length())
343✔
2074
                                    != yajl_status_ok
2075
                                || yajl_complete_parse(handle)
343✔
2076
                                    != yajl_status_ok)
2077
                            {
2078
                                log_debug("json parsing failed");
×
2079
                                string_fragment unq_frag{
2080
                                    qv.qv_value.sf_string,
×
2081
                                    qv.qv_value.sf_begin + 1,
×
2082
                                    qv.qv_value.sf_end - 1,
×
2083
                                };
2084

2085
                                return lph.process_value(unq_frag);
×
2086
                            }
2087

2088
                            return scan_match{};
343✔
2089
                        },
343✔
2090
                        [&lph](const logfmt::parser::unquoted_value& uv)
4,300✔
2091
                            -> scan_result_t {
2092
                            return lph.process_value(uv.uv_value);
3,844✔
2093
                        });
8,600✔
2094
                },
2095
                [](const logfmt::parser::error& err) -> scan_result_t {
×
2096
                    // log_error("logfmt parse error: %s", err.e_msg.c_str());
2097
                    return scan_no_match{};
12,292✔
2098
                });
16,830✔
2099
            if (value_res.is<scan_no_match>()) {
16,830✔
2100
                retval = value_res;
12,292✔
2101
                done = true;
12,292✔
2102
            }
2103
        }
16,830✔
2104

2105
        if (lph.lph_found_time) {
12,530✔
2106
            this->lf_timestamp_flags = lph.lph_time_tm.et_flags;
31✔
2107
            dst.emplace_back(
31✔
2108
                li.li_file_range.fr_offset, lph.lph_tv, lph.lph_level);
31✔
2109
            retval = scan_match{2000};
31✔
2110
        }
2111

2112
        return retval;
25,060✔
2113
    }
×
2114

2115
    void annotate(logfile* lf,
11✔
2116
                  uint64_t line_number,
2117
                  string_attrs_t& sa,
2118
                  logline_value_vector& values) const override
2119
    {
2120
        static const intern_string_t FIELDS_NAME
2121
            = intern_string::lookup("fields");
15✔
2122

2123
        auto& sbr = values.lvv_sbr;
11✔
2124
        auto p = logfmt::parser(sbr.to_string_fragment());
11✔
2125
        auto done = false;
11✔
2126
        auto found_body = false;
11✔
2127

2128
        while (!done) {
95✔
2129
            auto parse_result = p.step();
84✔
2130

2131
            done = parse_result.match(
168✔
2132
                [](const logfmt::parser::end_of_input&) { return true; },
11✔
2133
                [this, &sa, &values, &found_body](
×
2134
                    const logfmt::parser::kvpair& kvp) {
2135
                    auto value_frag = kvp.second.match(
73✔
2136
                        [this, &kvp, &values](
×
2137
                            const logfmt::parser::bool_value& bv) {
2138
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2139
                                                              kvp.first),
×
2140
                                                          value_kind_t::
2141
                                                              VALUE_INTEGER,
2142
                                                          logline_value_meta::
2143
                                                              table_column{0},
×
2144
                                                          (log_format*) this}
×
2145
                                           .with_struct_name(FIELDS_NAME);
×
2146
                            values.lvv_values.emplace_back(lvm, bv.bv_value);
×
2147

2148
                            return bv.bv_str_value;
×
2149
                        },
×
2150
                        [this, &kvp, &values](
×
2151
                            const logfmt::parser::int_value& iv) {
2152
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2153
                                                              kvp.first),
×
2154
                                                          value_kind_t::
2155
                                                              VALUE_INTEGER,
2156
                                                          logline_value_meta::
2157
                                                              table_column{0},
×
2158
                                                          (log_format*) this}
×
2159
                                           .with_struct_name(FIELDS_NAME);
×
2160
                            values.lvv_values.emplace_back(lvm, iv.iv_value);
×
2161

2162
                            return iv.iv_str_value;
×
2163
                        },
×
2164
                        [this, &kvp, &values](
73✔
2165
                            const logfmt::parser::float_value& fv) {
2166
                            auto lvm = logline_value_meta{intern_string::lookup(
×
2167
                                                              kvp.first),
×
2168
                                                          value_kind_t::
2169
                                                              VALUE_INTEGER,
2170
                                                          logline_value_meta::
2171
                                                              table_column{0},
×
2172
                                                          (log_format*) this}
×
2173
                                           .with_struct_name(FIELDS_NAME);
×
2174
                            values.lvv_values.emplace_back(lvm, fv.fv_value);
×
2175

2176
                            return fv.fv_str_value;
×
2177
                        },
×
2178
                        [](const logfmt::parser::quoted_value& qv) {
×
2179
                            return qv.qv_value;
24✔
2180
                        },
2181
                        [](const logfmt::parser::unquoted_value& uv) {
×
2182
                            return uv.uv_value;
49✔
2183
                        });
2184
                    auto value_lr
2185
                        = line_range{value_frag.sf_begin, value_frag.sf_end};
73✔
2186

2187
                    auto known_field = false;
73✔
2188
                    if (kvp.first.is_one_of(
73✔
2189
                            "timestamp"_frag, "time"_frag, "ts"_frag, "t"_frag))
2190
                    {
2191
                        sa.emplace_back(value_lr, L_TIMESTAMP.value());
11✔
2192
                        known_field = true;
11✔
2193
                    } else if (kvp.first.is_one_of("level"_frag, "lvl"_frag)) {
62✔
2194
                        sa.emplace_back(value_lr, L_LEVEL.value());
11✔
2195
                        known_field = true;
11✔
2196
                    } else if (kvp.first.is_one_of("msg"_frag, "message"_frag))
51✔
2197
                    {
2198
                        sa.emplace_back(value_lr, SA_BODY.value());
11✔
2199
                        found_body = true;
11✔
2200
                    } else if (kvp.second.is<logfmt::parser::quoted_value>()
40✔
2201
                               || kvp.second
78✔
2202
                                      .is<logfmt::parser::unquoted_value>())
38✔
2203
                    {
2204
                        auto lvm
2205
                            = logline_value_meta{intern_string::lookup(
160✔
2206
                                                     kvp.first),
40✔
2207
                                                 value_frag.startswith("\"")
40✔
2208
                                                     ? value_kind_t::VALUE_JSON
2209
                                                     : value_kind_t::VALUE_TEXT,
2210
                                                 logline_value_meta::
2211
                                                     table_column{0},
40✔
2212
                                                 (log_format*) this}
80✔
2213
                                  .with_struct_name(FIELDS_NAME);
40✔
2214
                        values.lvv_values.emplace_back(lvm, value_frag);
40✔
2215
                    }
40✔
2216
                    if (known_field) {
73✔
2217
                        auto key_with_eq = kvp.first;
22✔
2218
                        key_with_eq.sf_end += 1;
22✔
2219
                        sa.emplace_back(to_line_range(key_with_eq),
22✔
2220
                                        SA_REPLACED.value());
44✔
2221
                    } else {
2222
                        sa.emplace_back(to_line_range(kvp.first),
51✔
2223
                                        VC_ROLE.value(role_t::VCR_OBJECT_KEY));
102✔
2224
                    }
2225
                    return false;
73✔
2226
                },
2227
                [line_number, &sbr](const logfmt::parser::error& err) {
84✔
2228
                    log_error(
×
2229
                        "bad line %.*s", (int) sbr.length(), sbr.get_data());
2230
                    log_error("%lld:logfmt parse error: %s",
×
2231
                              line_number,
2232
                              err.e_msg.c_str());
2233
                    return true;
×
2234
                });
2235
        }
84✔
2236

2237
        if (!found_body) {
11✔
2238
            sa.emplace_back(line_range::empty_at(sbr.length()),
×
2239
                            SA_BODY.value());
×
2240
        }
2241

2242
        log_format::annotate(lf, line_number, sa, values);
11✔
2243
    }
11✔
2244

2245
    std::shared_ptr<log_format> specialized(int fmt_lock) override
5✔
2246
    {
2247
        auto retval = std::make_shared<logfmt_format>(*this);
5✔
2248

2249
        retval->lf_specialized = true;
5✔
2250
        return retval;
10✔
2251
    }
5✔
2252
};
2253

2254
static auto format_binder = injector::bind_multiple<log_format>()
2255
                                .add<logfmt_format>()
2256
                                .add<bro_log_format>()
2257
                                .add<w3c_log_format>()
2258
                                .add<generic_log_format>()
2259
                                .add<piper_log_format>();
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc