• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tstack / lnav / 20114321350-2741

10 Dec 2025 09:41PM UTC coverage: 68.836% (-0.07%) from 68.908%
20114321350-2741

push

github

tstack
[filters] add level filter

41 of 135 new or added lines in 10 files covered. (30.37%)

447 existing lines in 8 files now uncovered.

51534 of 74865 relevant lines covered (68.84%)

434761.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

78.11
/src/logfile.cc
1
/**
2
 * Copyright (c) 2007-2012, Timothy Stack
3
 *
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * * Redistributions of source code must retain the above copyright notice, this
10
 * list of conditions and the following disclaimer.
11
 * * Redistributions in binary form must reproduce the above copyright notice,
12
 * this list of conditions and the following disclaimer in the documentation
13
 * and/or other materials provided with the distribution.
14
 * * Neither the name of Timothy Stack nor the names of its contributors
15
 * may be used to endorse or promote products derived from this software
16
 * without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 *
29
 * @file logfile.cc
30
 */
31

32
#include <chrono>
33
#include <filesystem>
34
#include <memory>
35
#include <optional>
36
#include <utility>
37
#include <vector>
38

39
#include "logfile.hh"
40

41
#include <errno.h>
42
#include <fcntl.h>
43
#include <string.h>
44
#include <sys/param.h>
45
#include <sys/resource.h>
46
#include <sys/stat.h>
47
#include <time.h>
48

49
#include "base/ansi_scrubber.hh"
50
#include "base/attr_line.builder.hh"
51
#include "base/auto_fd.hh"
52
#include "base/date_time_scanner.cfg.hh"
53
#include "base/fs_util.hh"
54
#include "base/injector.hh"
55
#include "base/intern_string.hh"
56
#include "base/is_utf8.hh"
57
#include "base/result.h"
58
#include "base/snippet_highlighters.hh"
59
#include "base/string_util.hh"
60
#include "base/time_util.hh"
61
#include "config.h"
62
#include "file_options.hh"
63
#include "hasher.hh"
64
#include "lnav_util.hh"
65
#include "log.watch.hh"
66
#include "log_format.hh"
67
#include "logfile.cfg.hh"
68
#include "piper.header.hh"
69
#include "shared_buffer.hh"
70
#include "text_format.hh"
71
#include "yajlpp/yajlpp_def.hh"
72

73
using namespace lnav::roles::literals;
74

75
static auto intern_lifetime = intern_string::get_table_lifetime();
76

77
static constexpr size_t INDEX_RESERVE_INCREMENT = 1024;
78

79
static constexpr size_t RETRY_MATCH_SIZE = 250;
80

81
static const typed_json_path_container<lnav::gzip::header>&
82
get_file_header_handlers()
4✔
83
{
84
    static const typed_json_path_container<lnav::gzip::header> retval = {
85
        yajlpp::property_handler("name").for_field(&lnav::gzip::header::h_name),
8✔
86
        yajlpp::property_handler("mtime").for_field(
8✔
87
            &lnav::gzip::header::h_mtime),
88
        yajlpp::property_handler("comment").for_field(
8✔
89
            &lnav::gzip::header::h_comment),
90
    };
24✔
91

92
    return retval;
4✔
93
}
16✔
94

95
Result<std::shared_ptr<logfile>, std::string>
96
logfile::open(std::filesystem::path filename,
705✔
97
              const logfile_open_options& loo,
98
              auto_fd fd)
99
{
100
    require(!filename.empty());
705✔
101

102
    auto lf = std::shared_ptr<logfile>(new logfile(std::move(filename), loo));
705✔
103

104
    memset(&lf->lf_stat, 0, sizeof(lf->lf_stat));
705✔
105
    std::filesystem::path resolved_path;
705✔
106

107
    if (!fd.has_value()) {
705✔
108
        auto rp_res = lnav::filesystem::realpath(lf->lf_filename);
695✔
109
        if (rp_res.isErr()) {
695✔
110
            return Err(fmt::format(FMT_STRING("realpath({}) failed with: {}"),
×
111
                                   lf->lf_filename,
×
112
                                   rp_res.unwrapErr()));
×
113
        }
114

115
        resolved_path = rp_res.unwrap();
695✔
116
        if (lnav::filesystem::statp(resolved_path, &lf->lf_stat) == -1) {
695✔
117
            return Err(fmt::format(FMT_STRING("stat({}) failed with: {}"),
×
118
                                   lf->lf_filename,
×
119
                                   lnav::from_errno()));
×
120
        }
121

122
        if (!S_ISREG(lf->lf_stat.st_mode)) {
695✔
123
            return Err(fmt::format(FMT_STRING("{} is not a regular file"),
×
124
                                   lf->lf_filename));
×
125
        }
126
    }
695✔
127

128
    auto_fd lf_fd;
705✔
129
    if (fd.has_value()) {
705✔
130
        lf_fd = std::move(fd);
10✔
131
    } else if ((lf_fd
695✔
132
                = lnav::filesystem::openp(resolved_path, O_RDONLY | O_CLOEXEC))
695✔
133
               == -1)
695✔
134
    {
135
        return Err(fmt::format(FMT_STRING("open({}) failed with: {}"),
×
136
                               lf->lf_filename,
×
137
                               lnav::from_errno()));
×
138
    } else {
139
        lf->lf_actual_path = lf->lf_filename;
695✔
140
        lf->lf_valid_filename = true;
695✔
141
    }
142

143
    lf_fd.close_on_exec();
705✔
144

145
    log_info("Creating logfile: fd=%d; size=%" PRId64 "; mtime=%" PRId64
705✔
146
             "; filename=%s",
147
             (int) lf_fd,
148
             (long long) lf->lf_stat.st_size,
149
             (long long) lf->lf_stat.st_mtime,
150
             lf->lf_filename_as_string.c_str());
151
    if (lf->lf_actual_path) {
705✔
152
        log_info("  actual_path=%s", lf->lf_actual_path->c_str());
695✔
153
    }
154

155
    if (!lf->lf_options.loo_filename.empty()) {
705✔
156
        lf->set_filename(lf->lf_options.loo_filename);
73✔
157
        lf->lf_valid_filename = false;
73✔
158
    }
159

160
    lf->lf_line_buffer.set_fd(lf_fd);
705✔
161
    lf->lf_index.reserve(INDEX_RESERVE_INCREMENT);
705✔
162

163
    lf->lf_indexing = lf->lf_options.loo_is_visible;
705✔
164
    lf->lf_text_format
705✔
165
        = lf->lf_options.loo_text_format.value_or(text_format_t::TF_UNKNOWN);
705✔
166
    lf->lf_format_match_messages = loo.loo_match_details;
705✔
167

168
    const auto& hdr = lf->lf_line_buffer.get_header_data();
705✔
169
    if (hdr.valid()) {
705✔
170
        log_info("%s: has header %d",
64✔
171
                 lf->lf_filename_as_string.c_str(),
172
                 hdr.valid());
173
        hdr.match(
64✔
174
            [&lf](const lnav::gzip::header& gzhdr) {
×
175
                if (!gzhdr.empty()) {
4✔
176
                    lf->lf_embedded_metadata["net.zlib.gzip.header"] = {
12✔
177
                        text_format_t::TF_JSON,
178
                        get_file_header_handlers()
8✔
179
                            .formatter_for(gzhdr)
8✔
180
                            .with_config(yajl_gen_beautify, 1)
8✔
181
                            .to_string(),
182
                    };
12✔
183
                }
184
            },
12✔
185
            [&lf](const lnav::piper::header& phdr) {
64✔
186
                static auto& safe_options_hier
187
                    = injector::get<lnav::safe_file_options_hier&>();
60✔
188

189
                lf->lf_embedded_metadata["org.lnav.piper.header"] = {
180✔
190
                    text_format_t::TF_JSON,
191
                    lnav::piper::header_handlers.formatter_for(phdr)
60✔
192
                        .with_config(yajl_gen_beautify, 1)
120✔
193
                        .to_string(),
194
                };
120✔
195
                log_info("setting file name from piper header: %s",
60✔
196
                         phdr.h_name.c_str());
197
                lf->set_filename(phdr.h_name);
60✔
198
                lf->lf_valid_filename = false;
60✔
199
                if (phdr.h_demux_output
60✔
200
                    == lnav::piper::demux_output_t::signal) {
201
                    lf->lf_text_format = text_format_t::TF_LOG;
14✔
202
                }
203

204
                lnav::file_options fo;
60✔
205
                if (!phdr.h_timezone.empty()) {
60✔
206
                    log_info("setting default time zone from piper header: %s",
25✔
207
                             phdr.h_timezone.c_str());
208
                    try {
209
                        fo.fo_default_zone.pp_value
210
                            = date::locate_zone(phdr.h_timezone);
25✔
211
                    } catch (const std::runtime_error& e) {
×
212
                        log_error("unable to get tz from piper header %s -- %s",
×
213
                                  phdr.h_timezone.c_str(),
214
                                  e.what());
215
                    }
×
216
                }
217
                if (!fo.empty()) {
60✔
218
                    safe::WriteAccess<lnav::safe_file_options_hier>
219
                        options_hier(safe_options_hier);
25✔
220

221
                    auto& coll = options_hier->foh_path_to_collection["/"];
25✔
222
                    auto iter
223
                        = coll.foc_pattern_to_options.find(lf->get_filename());
25✔
224
                    if (iter == coll.foc_pattern_to_options.end()
25✔
225
                        || !(iter->second == fo))
25✔
226
                    {
227
                        coll.foc_pattern_to_options[lf->get_filename()] = fo;
25✔
228
                        options_hier->foh_generation += 1;
25✔
229
                    }
230
                }
25✔
231
            });
180✔
232
    }
233

234
    lf->file_options_have_changed();
705✔
235
    lf->lf_content_id = hasher().update(lf->lf_filename_as_string).to_string();
705✔
236

237
    lf->lf_line_buffer.set_do_preloading(true);
705✔
238
    lf->lf_line_buffer.send_initial_load();
705✔
239

240
    ensure(lf->invariant());
705✔
241

242
    return Ok(lf);
705✔
243
}
705✔
244

245
logfile::logfile(std::filesystem::path filename,
705✔
246
                 const logfile_open_options& loo)
705✔
247
    : lf_filename(std::move(filename)),
705✔
248
      lf_filename_as_string(lf_filename.string()), lf_options(loo),
705✔
249
      lf_basename(lf_filename.filename())
1,410✔
250
{
251
    this->lf_line_buffer.set_decompress_extra(true);
705✔
252
    this->lf_opids.writeAccess()->los_opid_ranges.reserve(64);
705✔
253
    this->lf_thread_ids.writeAccess()->ltis_tid_ranges.reserve(64);
705✔
254
}
705✔
255

256
logfile::~logfile()
1,410✔
257
{
258
    log_info("destructing logfile: %s", this->lf_filename_as_string.c_str());
705✔
259
}
1,410✔
260

261
bool
262
logfile::file_options_have_changed()
4,979✔
263
{
264
    static auto& safe_options_hier
265
        = injector::get<lnav::safe_file_options_hier&>();
4,979✔
266

267
    bool tz_changed = false;
4,979✔
268

269
    {
270
        safe::ReadAccess<lnav::safe_file_options_hier> options_hier(
271
            safe_options_hier);
4,979✔
272

273
        if (this->lf_file_options_generation == options_hier->foh_generation) {
4,979✔
274
            return false;
4,343✔
275
        }
276
        log_info("%s: checking new generation of file options: %zu -> %zu",
636✔
277
                 this->lf_filename_as_string.c_str(),
278
                 this->lf_file_options_generation,
279
                 options_hier->foh_generation);
280
        auto new_options = options_hier->match(this->get_filename());
636✔
281
        if (this->lf_file_options == new_options) {
636✔
282
            this->lf_file_options_generation = options_hier->foh_generation;
604✔
283
            return false;
604✔
284
        }
285

286
        this->lf_file_options = new_options;
32✔
287
        log_info("%s: file options have changed",
32✔
288
                 this->lf_filename_as_string.c_str());
289
        if (this->lf_file_options) {
32✔
290
            log_info(
32✔
291
                "  tz=%s",
292
                this->lf_file_options->second.fo_default_zone.pp_value->name()
293
                    .c_str());
294
            if (this->lf_file_options->second.fo_default_zone.pp_value
32✔
295
                    != nullptr
296
                && this->lf_format != nullptr
32✔
297
                && !(this->lf_format->lf_timestamp_flags & ETF_ZONE_SET))
64✔
298
            {
299
                log_info("  tz change affects this file");
3✔
300
                tz_changed = true;
3✔
301
            }
302
        } else if (this->lf_format != nullptr
×
303
                   && !(this->lf_format->lf_timestamp_flags & ETF_ZONE_SET)
×
UNCOV
304
                   && this->lf_format->lf_date_time.dts_default_zone != nullptr)
×
305
        {
UNCOV
306
            tz_changed = true;
×
307
        }
308
        this->lf_file_options_generation = options_hier->foh_generation;
32✔
309
    }
5,583✔
310

311
    return tz_changed;
32✔
312
}
313

314
logfile::map_entry_result
315
logfile::find_content_map_entry(file_off_t offset, map_read_requirement req)
3✔
316
{
317
    static constexpr auto LOOKBACK_SIZE = 32 * 1024;
318
    static constexpr auto MAX_LOOKBACK_SIZE = 4 * 1024 * 1024;
319

320
    auto lookback_size = this->lf_line_buffer.is_compressed()
3✔
321
        ? LOOKBACK_SIZE * 4
3✔
322
        : LOOKBACK_SIZE;
3✔
323

324
    if (offset < lookback_size) {
3✔
325
        return map_entry_not_found{};
1✔
326
    }
327
    auto end_range = file_range{
2✔
328
        offset - lookback_size,
2✔
329
        lookback_size,
2✔
330
    };
2✔
331

332
    auto full_size = this->get_content_size();
2✔
333
    file_size_t lower_offset = 0;
2✔
334
    file_size_t upper_offset = full_size;
2✔
335
    auto looping = true;
2✔
336
    std::optional<content_map_entry> best_lower_bound;
2✔
337
    do {
1✔
338
        std::optional<content_map_entry> lower_retval;
3✔
339
        std::optional<content_map_entry> time_found;
3✔
340
        log_debug(
3✔
341
            "    peeking range (off=%lld; size=%lld;  lower=%lld; upper=%lld)",
342
            end_range.fr_offset,
343
            end_range.fr_size,
344
            lower_offset,
345
            upper_offset);
346
        auto peek_res = this->lf_line_buffer.peek_range(end_range);
3✔
347
        if (!peek_res.isOk()) {
3✔
348
            log_error("    peek failed -- %s", peek_res.unwrapErr().c_str());
×
UNCOV
349
            return map_entry_not_found{};
×
350
        }
351
        auto peek_buf = peek_res.unwrap();
3✔
352
        auto peek_sf = to_string_fragment(peek_buf);
3✔
353

354
        if (req.is<map_read_upper_bound>()) {
3✔
355
            if (!peek_sf.endswith("\n")) {
1✔
356
                log_warning("    peek returned partial line");
×
357
                this->lf_file_size_at_map_time = full_size;
×
UNCOV
358
                return map_entry_not_found{};
×
359
            }
360
            peek_sf.pop_back();
1✔
361
        }
362
        auto found_line = false;
3✔
363
        while (!peek_sf.empty()) {
95✔
364
            auto rsplit_res = peek_sf.rsplit_pair(string_fragment::tag1{'\n'});
95✔
365
            if (!rsplit_res) {
95✔
366
                log_trace("    did not peek enough to find last line (off=%d)",
1✔
367
                          peek_sf.sf_end);
368
                if (!found_line && req.is<map_read_upper_bound>()) {
1✔
369
                    if (end_range.fr_offset < lookback_size) {
×
UNCOV
370
                        return map_entry_not_found{};
×
371
                    }
372
                    end_range.fr_offset -= lookback_size;
×
373
                    end_range.fr_size += lookback_size;
×
374
                    if (end_range.next_offset() > full_size) {
×
375
                        end_range.fr_offset = 0;
×
376
                        end_range.fr_size = full_size;
×
377
                    } else if (end_range.fr_size > MAX_LOOKBACK_SIZE) {
×
UNCOV
378
                        return map_entry_not_found{};
×
379
                    }
380
                }
381
                break;
1✔
382
            }
383

384
            found_line = true;
94✔
385
            auto [leading, last_line] = rsplit_res.value();
94✔
386
            // log_debug("leading %d", leading.length());
387
            // log_debug("last %.*s", last_line.length(), last_line.data());
388
            pattern_locks line_locks;
94✔
389
            scan_batch_context sbc_tmp{
94✔
390
                this->lf_allocator,
94✔
391
                line_locks,
392
            };
94✔
393
            shared_buffer tmp_sb;
94✔
394
            shared_buffer_ref tmp_sbr;
94✔
395
            tmp_sbr.share(tmp_sb, last_line.data(), last_line.length());
94✔
396
            auto end_lines_fr = file_range{
94✔
397
                end_range.fr_offset + last_line.sf_begin,
94✔
398
                last_line.length(),
188✔
399
            };
94✔
400
            auto utf8_res = is_utf8(last_line, '\n');
94✔
401
            end_lines_fr.fr_metadata.m_has_ansi = utf8_res.usr_has_ansi;
94✔
402
            end_lines_fr.fr_metadata.m_valid_utf = utf8_res.is_valid();
94✔
403
            auto end_li = line_info{
94✔
404
                end_lines_fr,
405
            };
94✔
406
            end_li.li_utf8_scan_result = utf8_res;
94✔
407
            std::vector<logline> tmp_index;
94✔
408
            auto scan_res = this->lf_format->scan(
94✔
409
                *this, tmp_index, end_li, tmp_sbr, sbc_tmp);
94✔
410
            if (scan_res.is<log_format::scan_match>() && !tmp_index.empty()) {
94✔
411
                auto line_time
412
                    = tmp_index.back().get_time<std::chrono::microseconds>();
92✔
413

414
                if (req.is<map_read_lower_bound>()) {
92✔
415
                    auto lb = req.get<map_read_lower_bound>();
91✔
416
                    if (line_time >= lb.mrlb_time) {
91✔
417
                        log_debug("  got lower retval! %s",
90✔
418
                                  lnav::to_rfc3339_string(line_time).c_str());
419
                        lower_retval = content_map_entry{
180✔
420
                            end_lines_fr,
421
                            line_time,
422
                        };
90✔
423
                        if (!best_lower_bound
90✔
424
                            || line_time < best_lower_bound->cme_time)
90✔
425
                        {
426
                            best_lower_bound = lower_retval;
55✔
427
                        }
428
                    } else if (lower_retval) {
1✔
429
                        return map_entry_found{lower_retval.value()};
1✔
430
                    } else {
431
                        // need to move forward
UNCOV
432
                        time_found = content_map_entry{
×
433
                            end_lines_fr,
434
                            line_time,
435
                        };
436
                        peek_sf = string_fragment{};
×
UNCOV
437
                        continue;
×
438
                    }
439
                } else {
440
                    return map_entry_found{content_map_entry{
1✔
441
                        end_lines_fr,
442
                        line_time,
443
                    }};
1✔
444
                }
445
            }
446
            // log_trace("%s: no match for line, going back",
447
            // this->lf_filename_as_string.c_str());
448
            peek_sf = leading;
92✔
449
        }
104✔
450

451
        log_trace("    no messages found in peek, going back further");
1✔
452
        if (time_found && best_lower_bound
1✔
453
            && end_range.next_offset() >= upper_offset)
1✔
454
        {
455
            log_info("    lower bound lies in upper half");
×
UNCOV
456
            return map_entry_found{best_lower_bound.value()};
×
457
        }
458
        req.match(
1✔
459
            [&](map_read_upper_bound& m) {
×
460
                if (end_range.fr_offset < end_range.fr_size
×
UNCOV
461
                    || (full_size - end_range.fr_offset) >= MAX_LOOKBACK_SIZE)
×
462
                {
UNCOV
463
                    looping = false;
×
464
                } else {
465
                    // look further back
466
                    end_range.fr_offset = end_range.fr_offset + peek_sf.sf_end
×
UNCOV
467
                        + 1 - end_range.fr_size;
×
468
                }
UNCOV
469
            },
×
470
            [&](map_read_lower_bound& m) {
1✔
471
                if (lower_retval) {
1✔
472
                    upper_offset = lower_retval.value().cme_range.fr_offset;
1✔
473
                    log_debug("    first half %lld %s",
1✔
474
                              (upper_offset - lower_offset) / 2,
475
                              lnav::to_rfc3339_string(lower_retval->cme_time)
476
                                  .c_str());
477
                    auto amount = (upper_offset - lower_offset) / 2;
1✔
478
                    end_range.fr_offset = lower_offset + amount;
1✔
479
                    if (end_range.next_offset() > upper_offset) {
1✔
480
                        log_debug("    adjusting end offset");
1✔
481
                        if (end_range.fr_size < upper_offset) {
1✔
482
                            end_range.fr_offset
×
UNCOV
483
                                = upper_offset - end_range.fr_size;
×
484
                        } else {
485
                            end_range.fr_offset = 0;
1✔
486
                            end_range.fr_size = upper_offset;
1✔
487
                        }
488
                    }
489
                } else if (time_found) {
×
UNCOV
490
                    log_debug(
×
491
                        "    second half (%lld %lld) %s",
492
                        end_range.fr_offset,
493
                        upper_offset,
494
                        lnav::to_rfc3339_string(time_found.value().cme_time)
495
                            .c_str());
496
                    lower_offset = time_found->cme_range.next_offset();
×
497
                    end_range.fr_offset
×
498
                        = lower_offset + (upper_offset - lower_offset) / 2;
×
499
                } else if (end_range.next_offset() <= full_size) {
×
UNCOV
500
                    log_debug("    no time found (%lld %lld)",
×
501
                              end_range.fr_offset,
502
                              upper_offset);
503
                    if (end_range.next_offset() == upper_offset) {
×
UNCOV
504
                        upper_offset = end_range.fr_offset;
×
505
                    }
UNCOV
506
                    end_range.fr_offset = upper_offset - end_range.fr_size;
×
507
                } else {
UNCOV
508
                    looping = false;
×
509
                }
510
                if (end_range.next_offset() > full_size) {
1✔
UNCOV
511
                    end_range.fr_offset = full_size - end_range.fr_size;
×
512
                }
513
            });
1✔
514
    } while (looping);
5✔
515

UNCOV
516
    return map_entry_not_found{};
×
517
}
518

519
logfile::rebuild_result_t
520
logfile::build_content_map()
3✔
521
{
522
    static auto op = lnav_operation{"build_content_map"};
3✔
523

524
    auto op_guard = lnav_opid_guard::internal(op);
3✔
525

526
    log_info("%s: trying to build content map",
3✔
527
             this->lf_filename_as_string.c_str());
528
    if (this->lf_line_buffer.is_compressed()) {
3✔
529
        auto skip_size = file_off_t{512 * 1024};
×
530
        auto read_size = file_ssize_t{64 * 1024};
×
531
        pattern_locks line_locks;
×
532
        scan_batch_context sbc_tmp{
×
UNCOV
533
            this->lf_allocator,
×
534
            line_locks,
535
        };
536

UNCOV
537
        auto peek_range = file_range{
×
538
            0,
539
            read_size,
540
        };
UNCOV
541
        log_info("  file is compressed, doing scan");
×
542
        while (true) {
543
            auto last_peek = peek_range;
×
544
            peek_range.fr_offset += skip_size;
×
UNCOV
545
            log_debug("    content map peek %lld:%lld",
×
546
                      peek_range.fr_offset,
547
                      peek_range.fr_size);
548
            auto peek_res = this->lf_line_buffer.peek_range(
549
                peek_range,
550
                {
551
                    line_buffer::peek_options::allow_short_read,
552
                });
×
553
            if (peek_res.isErr()) {
×
UNCOV
554
                log_error("    content map peek failed -- %s",
×
555
                          peek_res.unwrapErr().c_str());
UNCOV
556
                break;
×
557
            }
558

559
            auto buf = peek_res.unwrap();
×
560
            if (buf.empty()) {
×
561
                if (this->lf_line_buffer.get_file_size() == -1) {
×
562
                    log_info("    skipped past end, reversing");
×
563
                    skip_size = peek_range.fr_size;
×
564
                    peek_range = last_peek;
×
UNCOV
565
                    continue;
×
566
                }
UNCOV
567
                log_info("    reached end of file %lld",
×
568
                         this->lf_line_buffer.get_file_size());
UNCOV
569
                break;
×
570
            }
571
            auto buf_sf = to_string_fragment(buf);
×
572
            auto split_res = buf_sf.split_pair(string_fragment::tag1{'\n'});
×
573
            if (!split_res) {
×
UNCOV
574
                log_warning("  cannot find start of line at %lld",
×
575
                            peek_range.fr_offset);
UNCOV
576
                continue;
×
577
            }
578

579
            auto [_junk, line_start_sf] = split_res.value();
×
580
            while (!line_start_sf.empty()) {
×
581
                auto utf8_res = is_utf8(line_start_sf, '\n');
×
582
                if (!utf8_res.usr_remaining) {
×
UNCOV
583
                    log_warning("    cannot find end of line at %lld",
×
584
                                peek_range.fr_offset + line_start_sf.sf_begin);
UNCOV
585
                    break;
×
586
                }
587
                auto line_len = utf8_res.remaining_ptr() - line_start_sf.data();
×
588
                shared_buffer tmp_sb;
×
UNCOV
589
                shared_buffer_ref tmp_sbr;
×
590

UNCOV
591
                tmp_sbr.share(tmp_sb, line_start_sf.data(), line_len);
×
592

593
                auto map_line_fr = file_range{
×
UNCOV
594
                    peek_range.fr_offset + line_start_sf.sf_begin,
×
595
                    line_len,
596
                };
597
                map_line_fr.fr_metadata.m_has_ansi = utf8_res.usr_has_ansi;
×
598
                map_line_fr.fr_metadata.m_valid_utf = utf8_res.is_valid();
×
599
                auto map_li = line_info{map_line_fr};
×
600
                map_li.li_utf8_scan_result = utf8_res;
×
601
                std::vector<logline> tmp_index;
×
602
                auto scan_res = this->lf_format->scan(
×
603
                    *this, tmp_index, map_li, tmp_sbr, sbc_tmp);
×
604
                if (scan_res.is<log_format::scan_match>()) {
×
605
                    auto line_time = tmp_index.front()
×
606
                                         .get_time<std::chrono::microseconds>();
×
UNCOV
607
                    this->lf_content_map.emplace_back(content_map_entry{
×
608
                        map_line_fr,
609
                        line_time,
610
                    });
UNCOV
611
                    log_info("  adding content map entry %lld - %s",
×
612
                             map_line_fr.fr_offset,
613
                             lnav::to_rfc3339_string(line_time).c_str());
614
                    if (skip_size < 1024 * 1024 * 1024) {
×
UNCOV
615
                        skip_size *= 2;
×
616
                    }
UNCOV
617
                    break;
×
618
                }
UNCOV
619
                line_start_sf = utf8_res.usr_remaining.value();
×
620
            }
621
        }
622
    }
623

624
    auto retval = rebuild_result_t::NO_NEW_LINES;
3✔
625
    auto full_size = this->get_content_size();
3✔
626

627
    this->lf_lower_bound_entry = std::nullopt;
3✔
628
    this->lf_upper_bound_entry = std::nullopt;
3✔
629

630
    log_info("  finding content layout (full_size=%lld)", full_size);
3✔
631
    if (this->lf_options.loo_time_range.has_lower_bound()
3✔
632
        && this->lf_options.loo_time_range.tr_begin
3✔
633
            > this->lf_index.front().get_time<std::chrono::microseconds>()
3✔
634
        && this->lf_options.loo_time_range.tr_begin
6✔
635
            <= this->lf_index.back().get_time<std::chrono::microseconds>())
5✔
636
    {
637
        auto ll_opt = this->find_from_time(
2✔
638
            to_timeval(this->lf_options.loo_time_range.tr_begin));
2✔
639
        auto ll = ll_opt.value();
2✔
640
        auto first_line_offset = ll->get_offset();
2✔
UNCOV
641
        this->lf_lower_bound_entry = content_map_entry{
×
642
            file_range{first_line_offset, full_size - first_line_offset},
2✔
643
            ll->get_time<std::chrono::microseconds>(),
2✔
644
        };
2✔
645
        log_info("  lower bound is within current index, erasing %ld lines",
4✔
646
                 std::distance(this->lf_index.cbegin(), ll));
647
        this->lf_index_size = first_line_offset;
2✔
648
        this->lf_index.clear();
2✔
649
        retval = rebuild_result_t::NEW_ORDER;
2✔
650
    }
651

652
    if (this->lf_index_size == full_size) {
3✔
653
        log_trace("  file has already been scanned, no need to peek");
1✔
654
        const auto& last_line = this->lf_index.back();
1✔
655
        auto last_line_offset = last_line.get_offset();
1✔
UNCOV
656
        this->lf_upper_bound_entry = content_map_entry{
×
657
            file_range{last_line_offset, full_size - last_line_offset},
1✔
658
            last_line.get_time<std::chrono::microseconds>(),
1✔
659
        };
1✔
660
        if (this->lf_options.loo_time_range.has_lower_bound()
1✔
661
            && this->lf_options.loo_time_range.tr_begin
2✔
662
                > this->lf_index.back().get_time<std::chrono::microseconds>())
2✔
663
        {
664
            log_info("  lower bound is past content");
×
665
            this->lf_index.clear();
×
UNCOV
666
            retval = rebuild_result_t::NEW_ORDER;
×
667
        }
668
        this->lf_file_size_at_map_time = full_size;
1✔
669
        return retval;
1✔
670
    }
671

672
    auto end_entry_opt
673
        = this->find_content_map_entry(full_size, map_read_upper_bound{});
2✔
674
    if (!end_entry_opt.is<map_entry_found>()) {
2✔
675
        log_warning(
1✔
676
            "  skipping content map since the last message could not be "
677
            "found");
678
        return retval;
1✔
679
    }
680

681
    auto end_entry = end_entry_opt.get<map_entry_found>().mef_entry;
1✔
682
    log_info("  found content end: %llu %s",
1✔
683
             end_entry.cme_range.fr_offset,
684
             lnav::to_rfc3339_string(to_timeval(end_entry.cme_time)).c_str());
685
    this->lf_upper_bound_entry = end_entry;
1✔
686
    this->lf_file_size_at_map_time = full_size;
1✔
687

688
    if (this->lf_options.loo_time_range.has_lower_bound()) {
1✔
689
        if (this->lf_options.loo_time_range.tr_begin > end_entry.cme_time) {
1✔
UNCOV
690
            retval = rebuild_result_t::NEW_ORDER;
×
691
        } else if (this->lf_index.empty()
1✔
692
                   || this->lf_options.loo_time_range.tr_begin
1✔
UNCOV
693
                       > this->lf_index.back()
×
694
                             .get_time<std::chrono::microseconds>())
1✔
695
        {
696
            auto offset = full_size / 2;
1✔
697
            log_debug("  searching for lower bound %lld",
1✔
698
                      this->lf_options.loo_time_range.tr_begin.count());
699
            auto low_entry_opt = this->find_content_map_entry(
700
                offset,
UNCOV
701
                map_read_lower_bound{
×
702
                    this->lf_options.loo_time_range.tr_begin,
703
                });
1✔
704
            if (low_entry_opt.is<map_entry_found>()) {
1✔
705
                auto low_entry = low_entry_opt.get<map_entry_found>().mef_entry;
1✔
706
                log_info("  found content start: %llu %s",
1✔
707
                         low_entry.cme_range.fr_offset,
708
                         lnav::to_rfc3339_string(to_timeval(low_entry.cme_time))
709
                             .c_str());
710
                this->lf_lower_bound_entry = low_entry;
1✔
711
                this->lf_index_size = low_entry.cme_range.fr_offset;
1✔
712

713
                retval = rebuild_result_t::NEW_ORDER;
1✔
714
            }
715
        }
1✔
716
    }
717

718
    if (retval == rebuild_result_t::NEW_ORDER) {
1✔
719
        {
720
            auto los = this->lf_opids.writeAccess();
1✔
721

722
            los->los_opid_ranges.clear();
1✔
723
            los->los_sub_in_use.clear();
1✔
724
        }
1✔
725
        {
726
            auto tids = this->lf_thread_ids.writeAccess();
1✔
727
            tids->ltis_tid_ranges.clear();
1✔
728
        }
1✔
729
        this->lf_pattern_locks.pl_lines.clear();
1✔
730
        this->lf_value_stats.clear();
1✔
731
        this->lf_index.clear();
1✔
732
        this->lf_upper_bound_size = std::nullopt;
1✔
733
    }
734

735
    return retval;
1✔
736
}
3✔
737

738
bool
739
logfile::in_range() const
4,154✔
740
{
741
    if (this->lf_format == nullptr) {
4,154✔
742
        return true;
646✔
743
    }
744

745
    return !this->lf_index.empty() || this->lf_lower_bound_entry.has_value();
3,508✔
746
}
747

748
bool
749
logfile::exists() const
4,154✔
750
{
751
    if (!this->lf_actual_path) {
4,154✔
752
        return true;
67✔
753
    }
754

755
    if (this->lf_options.loo_source == logfile_name_source::ARCHIVE) {
4,087✔
UNCOV
756
        return true;
×
757
    }
758

759
    auto stat_res = lnav::filesystem::stat_file(this->lf_actual_path.value());
4,087✔
760
    if (stat_res.isErr()) {
4,087✔
UNCOV
761
        log_error("%s: stat failed -- %s",
×
762
                  this->lf_actual_path.value().c_str(),
763
                  stat_res.unwrapErr().c_str());
UNCOV
764
        return false;
×
765
    }
766

767
    auto st = stat_res.unwrap();
4,087✔
768
    return this->lf_stat.st_dev == st.st_dev
4,087✔
769
        && this->lf_stat.st_ino == st.st_ino;
4,087✔
770
}
4,087✔
771

772
auto
773
logfile::reset_state() -> void
8✔
774
{
775
    this->clear_time_offset();
8✔
776
    this->lf_indexing = this->lf_options.loo_is_visible;
8✔
777
}
8✔
778

779
void
780
logfile::set_format_base_time(log_format* lf, const line_info& li)
909,815✔
781
{
782
    time_t file_time = li.li_timestamp.tv_sec != 0
909,815✔
783
        ? li.li_timestamp.tv_sec
909,815✔
784
        : this->lf_line_buffer.get_file_time();
884,933✔
785

786
    if (file_time == 0) {
909,815✔
787
        file_time = this->lf_stat.st_mtime;
883,105✔
788
    }
789

790
    if (!this->lf_cached_base_time
909,815✔
791
        || this->lf_cached_base_time.value() != file_time)
909,815✔
792
    {
793
        tm new_base_tm;
794
        this->lf_cached_base_time = file_time;
674✔
795
        localtime_r(&file_time, &new_base_tm);
674✔
796
        this->lf_cached_base_tm = new_base_tm;
674✔
797
    }
798
    lf->lf_date_time.set_base_time(this->lf_cached_base_time.value(),
909,815✔
799
                                   this->lf_cached_base_tm.value());
909,815✔
800
}
909,815✔
801

802
time_range
803
logfile::get_content_time_range() const
33✔
804
{
805
    if (this->lf_format == nullptr || this->lf_index.empty()) {
33✔
806
        return {
807
            std::chrono::seconds{this->lf_stat.st_ctime},
×
UNCOV
808
            std::chrono::seconds{this->lf_stat.st_mtime},
×
809
        };
810
    }
811

812
    return {
813
        this->lf_index.front().get_time<std::chrono::microseconds>(),
33✔
814
        this->lf_index.back().get_time<std::chrono::microseconds>(),
33✔
815
    };
33✔
816
}
817

818
bool
819
logfile::process_prefix(shared_buffer_ref& sbr,
18,898✔
820
                        const line_info& li,
821
                        scan_batch_context& sbc)
822
{
823
    static auto max_unrecognized_lines
824
        = injector::get<const lnav::logfile::config&>()
1,186✔
825
              .lc_max_unrecognized_lines;
18,898✔
826

827
    log_format::scan_result_t found = log_format::scan_no_match{};
18,898✔
828
    size_t prescan_size = this->lf_index.size();
18,898✔
829
    auto prescan_time = std::chrono::microseconds{0};
18,898✔
830
    bool retval = false;
18,898✔
831

832
    if (this->lf_options.loo_detect_format
37,796✔
833
        && (this->lf_format == nullptr
32,962✔
834
            || this->lf_index.size() < RETRY_MATCH_SIZE))
14,064✔
835
    {
836
        const auto& root_formats = log_format::get_root_formats();
12,541✔
837
        std::optional<std::pair<log_format*, log_format::scan_match>>
838
            best_match;
12,541✔
839
        size_t scan_count = 0;
12,541✔
840

841
        if (!this->lf_index.empty()) {
12,541✔
842
            prescan_time = this->lf_index[prescan_size - 1]
11,424✔
843
                               .get_time<std::chrono::microseconds>();
11,424✔
844
        }
845
        if (this->lf_format != nullptr) {
12,541✔
UNCOV
846
            best_match = std::make_pair(
×
847
                this->lf_format.get(),
10,479✔
848
                log_format::scan_match{this->lf_format_quality});
20,958✔
849
        }
850

851
        /*
852
         * Try each scanner until we get a match.  Fortunately, the formats
853
         * tend to be sufficiently different that there are few ambiguities...
854
         */
855
        log_trace("logfile[%s]: scanning line %zu (offset: %lld; size: %lld)",
12,541✔
856
                  this->lf_filename_as_string.c_str(),
857
                  this->lf_index.size(),
858
                  li.li_file_range.fr_offset,
859
                  li.li_file_range.fr_size);
860
        auto starting_index_size = this->lf_index.size();
12,541✔
861
        size_t prev_index_size = this->lf_index.size();
12,541✔
862
        pattern_locks line_locks;
12,541✔
863
        scan_batch_context sbc_tmp{
12,541✔
864
            this->lf_allocator,
12,541✔
865
            line_locks,
866
        };
12,541✔
867
        sbc_tmp.sbc_value_stats.reserve(64);
12,541✔
868
        for (const auto& curr : root_formats) {
941,366✔
869
            if (this->lf_index.size()
928,825✔
870
                >= curr->lf_max_unrecognized_lines.value_or(
928,825✔
871
                    max_unrecognized_lines))
872
            {
873
                continue;
19,575✔
874
            }
875

876
            if (this->lf_mismatched_formats.count(curr->get_name()) > 0) {
928,825✔
877
                continue;
18,433✔
878
            }
879

880
            auto match_res = curr->match_name(this->lf_filename_as_string);
910,392✔
881
            if (match_res.is<log_format::name_mismatched>()) {
910,392✔
882
                auto nm = match_res.get<log_format::name_mismatched>();
1,142✔
883
                if (li.li_file_range.fr_offset == 0) {
1,142✔
884
                    log_debug("(%s) does not match file name: %s",
1,076✔
885
                              curr->get_name().get(),
886
                              this->lf_filename_as_string.c_str());
887
                }
888
                auto regex_al = attr_line_t(nm.nm_pattern);
1,142✔
889
                lnav::snippets::regex_highlighter(
1,142✔
890
                    regex_al, -1, line_range{0, (int) regex_al.length()});
1,142✔
891
                auto note = attr_line_t("pattern: ")
1,142✔
892
                                .append(regex_al)
1,142✔
893
                                .append("\n  ")
1,142✔
894
                                .append(lnav::roles::quoted_code(
2,284✔
895
                                    fmt::to_string(this->get_filename())))
2,284✔
896
                                .append("\n")
1,142✔
897
                                .append(nm.nm_partial + 2, ' ')
1,142✔
898
                                .append("^ matched up to here"_snippet_border);
1,142✔
899
                auto match_um = lnav::console::user_message::info(
1,142✔
900
                                    attr_line_t()
1,142✔
901
                                        .append(lnav::roles::identifier(
2,284✔
902
                                            curr->get_name().to_string()))
2,284✔
903
                                        .append(" file name pattern required "
1,142✔
904
                                                "by format does not match"))
905
                                    .with_note(note)
1,142✔
906
                                    .move();
1,142✔
907
                this->lf_format_match_messages.emplace_back(match_um);
1,142✔
908
                this->lf_mismatched_formats.insert(curr->get_name());
1,142✔
909
                continue;
1,142✔
910
            }
1,142✔
911
            if (this->lf_options.loo_format_name
909,250✔
912
                && !(curr->get_name()
909,250✔
913
                     == this->lf_options.loo_format_name.value()))
1,818,500✔
914
            {
915
                if (li.li_file_range.fr_offset == 0) {
×
UNCOV
916
                    log_debug("(%s) does not match file format: %s",
×
917
                              curr->get_name().get(),
918
                              fmt::to_string(this->lf_options.loo_file_format)
919
                                  .c_str());
920
                }
UNCOV
921
                continue;
×
922
            }
923

924
            scan_count += 1;
909,250✔
925
            curr->clear();
909,250✔
926
            this->set_format_base_time(curr.get(), li);
909,250✔
927
            log_format::scan_result_t scan_res{mapbox::util::no_init{}};
909,250✔
928
            if (this->lf_format != nullptr
909,250✔
929
                && this->lf_format->lf_root_format == curr.get())
909,250✔
930
            {
931
                scan_res = this->lf_format->scan(
20,958✔
932
                    *this, this->lf_index, li, sbr, sbc);
10,479✔
933
            } else {
934
                sbc_tmp.sbc_pattern_locks.pl_lines.clear();
898,771✔
935
                sbc_tmp.sbc_value_stats.clear();
898,771✔
936
                sbc_tmp.sbc_opids.los_opid_ranges.clear();
898,771✔
937
                sbc_tmp.sbc_opids.los_sub_in_use.clear();
898,771✔
938
                sbc_tmp.sbc_tids.ltis_tid_ranges.clear();
898,771✔
939
                sbc_tmp.sbc_level_cache = {};
898,771✔
940
                scan_res = curr->scan(*this, this->lf_index, li, sbr, sbc_tmp);
898,771✔
941
            }
942

943
            scan_res.match(
909,250✔
944
                [this,
909,250✔
945
                 &sbc,
946
                 &sbc_tmp,
947
                 &found,
948
                 &curr,
949
                 &best_match,
950
                 &prev_index_size,
951
                 starting_index_size](const log_format::scan_match& sm) {
952
                    if (best_match && this->lf_format != nullptr
25,683✔
953
                        && this->lf_format->lf_root_format == curr.get()
12,366✔
954
                        && best_match->first == this->lf_format.get())
25,683✔
955
                    {
956
                        prev_index_size = this->lf_index.size();
9,561✔
957
                        found = best_match->second;
9,561✔
958
                    } else if (!best_match
3,563✔
959
                               || (sm.sm_quality > best_match->second.sm_quality
6,525✔
960
                                   || (sm.sm_quality
5,924✔
961
                                           == best_match->second.sm_quality
2,962✔
962
                                       && sm.sm_strikes
332✔
963
                                           < best_match->second.sm_strikes)))
166✔
964
                    {
965
                        log_info(
601✔
966
                            "  scan with format (%s) matched with quality of "
967
                            "%d and %d strikes",
968
                            curr->get_name().c_str(),
969
                            sm.sm_quality,
970
                            sm.sm_strikes);
971

972
                        sbc.sbc_opids = sbc_tmp.sbc_opids;
601✔
973
                        sbc.sbc_tids = sbc_tmp.sbc_tids;
601✔
974
                        sbc.sbc_value_stats = sbc_tmp.sbc_value_stats;
601✔
975
                        sbc.sbc_pattern_locks = sbc_tmp.sbc_pattern_locks;
601✔
976
                        auto match_um
UNCOV
977
                            = lnav::console::user_message::info(
×
978
                                  attr_line_t()
601✔
979
                                      .append(lnav::roles::identifier(
601✔
980
                                          curr->get_name().to_string()))
1,202✔
981
                                      .append(" matched line ")
601✔
982
                                      .append(lnav::roles::number(
1,202✔
983
                                          fmt::to_string(starting_index_size))))
1,202✔
984
                                  .with_note(
1,202✔
985
                                      attr_line_t("match quality is ")
1,202✔
986
                                          .append(lnav::roles::number(
601✔
987
                                              fmt::to_string(sm.sm_quality)))
1,202✔
988
                                          .append(" with ")
601✔
989
                                          .append(lnav::roles::number(
1,202✔
990
                                              fmt::to_string(sm.sm_strikes)))
1,202✔
991
                                          .append(" strikes"))
601✔
992
                                  .move();
601✔
993
                        this->lf_format_match_messages.emplace_back(match_um);
601✔
994
                        if (best_match) {
601✔
995
                            auto starting_iter = std::next(
72✔
996
                                this->lf_index.begin(), starting_index_size);
36✔
997
                            auto last_iter = std::next(this->lf_index.begin(),
72✔
998
                                                       prev_index_size);
36✔
999
                            this->lf_index.erase(starting_iter, last_iter);
36✔
1000
                        }
1001
                        best_match = std::make_pair(curr.get(), sm);
601✔
1002
                        prev_index_size = this->lf_index.size();
601✔
1003
                    } else {
601✔
1004
                        log_trace(
2,962✔
1005
                            "  scan with format (%s) matched, but "
1006
                            "is lower quality (%d < %d) or more strikes (%d "
1007
                            "vs. %d)",
1008
                            curr->get_name().c_str(),
1009
                            sm.sm_quality,
1010
                            best_match->second.sm_quality,
1011
                            sm.sm_strikes,
1012
                            best_match->second.sm_strikes);
1013
                        while (this->lf_index.size() > prev_index_size) {
6,708✔
1014
                            this->lf_index.pop_back();
3,746✔
1015
                        }
1016
                    }
1017
                },
13,124✔
1018
                [curr](const log_format::scan_incomplete& si) {
909,250✔
1019
                    log_trace(
19✔
1020
                        "  scan with format (%s) is incomplete, "
1021
                        "more data required",
1022
                        curr->get_name().c_str());
1023
                },
19✔
1024
                [this, curr, prescan_size](
1,818,500✔
1025
                    const log_format::scan_no_match& snm) {
1026
                    if (this->lf_format == nullptr && prescan_size < 5) {
896,107✔
1027
                        log_trace(
65,126✔
1028
                            "  scan with format (%s) does not match -- %s",
1029
                            curr->get_name().c_str(),
1030
                            snm.snm_reason);
1031
                    }
1032
                });
896,107✔
1033
        }
910,392✔
1034

1035
        if (!scan_count) {
12,541✔
UNCOV
1036
            log_info("%s: no formats available to scan, no longer detecting",
×
1037
                     this->lf_filename_as_string.c_str());
UNCOV
1038
            this->lf_options.loo_detect_format = false;
×
1039
        }
1040

1041
        if (best_match
12,541✔
1042
            && (this->lf_format == nullptr
23,020✔
1043
                || ((this->lf_format->lf_root_format != best_match->first)
10,479✔
1044
                    && best_match->second.sm_quality
10,479✔
1045
                        > this->lf_format_quality)))
23,020✔
1046
        {
1047
            auto winner = best_match.value();
565✔
1048
            auto* curr = winner.first;
565✔
1049
            log_info("%s:%zu:log format found -- %s",
565✔
1050
                     this->lf_filename_as_string.c_str(),
1051
                     this->lf_index.size(),
1052
                     curr->get_name().get());
1053

1054
            auto match_um = lnav::console::user_message::ok(
1055
                attr_line_t()
565✔
1056
                    .append(lnav::roles::identifier(
565✔
1057
                        winner.first->get_name().to_string()))
1,130✔
1058
                    .append(" is the best match for line ")
565✔
1059
                    .append(lnav::roles::number(
1,130✔
1060
                        fmt::to_string(starting_index_size))));
1,695✔
1061
            this->lf_format_match_messages.emplace_back(match_um);
565✔
1062
            this->lf_text_format = text_format_t::TF_LOG;
565✔
1063
            this->lf_format = curr->specialized();
565✔
1064
            this->lf_level_stats = {};
565✔
1065
            for (const auto& ll : this->lf_index) {
1,502✔
1066
                if (ll.is_continued()) {
937✔
1067
                    continue;
117✔
1068
                }
1069
                this->lf_level_stats.update_msg_count(ll.get_msg_level());
820✔
1070
            }
1071
            this->lf_format_quality = winner.second.sm_quality;
565✔
1072
            this->set_format_base_time(this->lf_format.get(), li);
565✔
1073
            if (this->lf_format->lf_date_time.dts_fmt_lock != -1) {
565✔
1074
                this->lf_content_id
1075
                    = hasher().update(sbr.get_data(), sbr.length()).to_string();
546✔
1076
            }
1077

1078
            this->lf_applicable_taggers.clear();
565✔
1079
            for (auto& td_pair : this->lf_format->lf_tag_defs) {
578✔
1080
                bool matches = td_pair.second->ftd_paths.empty();
13✔
1081
                for (const auto& pr : td_pair.second->ftd_paths) {
18✔
1082
                    if (pr.matches(this->lf_filename_as_string.c_str())) {
5✔
1083
                        matches = true;
×
UNCOV
1084
                        break;
×
1085
                    }
1086
                }
1087
                if (!matches) {
13✔
1088
                    continue;
5✔
1089
                }
1090

1091
                log_info("%s: found applicable tag definition /%s/tags/%s",
8✔
1092
                         this->lf_filename_as_string.c_str(),
1093
                         this->lf_format->get_name().get(),
1094
                         td_pair.second->ftd_name.c_str());
1095
                this->lf_applicable_taggers.emplace_back(td_pair.second);
8✔
1096
            }
1097

1098
            this->lf_applicable_partitioners.clear();
565✔
1099
            for (auto& pd_pair : this->lf_format->lf_partition_defs) {
578✔
1100
                bool matches = pd_pair.second->fpd_paths.empty();
13✔
1101
                for (const auto& pr : pd_pair.second->fpd_paths) {
18✔
1102
                    if (pr.matches(this->lf_filename_as_string.c_str())) {
5✔
1103
                        matches = true;
×
UNCOV
1104
                        break;
×
1105
                    }
1106
                }
1107
                if (!matches) {
13✔
1108
                    continue;
5✔
1109
                }
1110

1111
                log_info(
8✔
1112
                    "%s: found applicable partition definition "
1113
                    "/%s/partitions/%s",
1114
                    this->lf_filename_as_string.c_str(),
1115
                    this->lf_format->get_name().get(),
1116
                    pd_pair.second->fpd_name.c_str());
1117
                this->lf_applicable_partitioners.emplace_back(pd_pair.second);
8✔
1118
            }
1119

1120
            /*
1121
             * We'll go ahead and assume that any previous lines were
1122
             * written out at the same time as the last one, so we need to
1123
             * go back and update everything.
1124
             */
1125
            const auto& last_line = this->lf_index.back();
565✔
1126

1127
            require_lt(starting_index_size, this->lf_index.size());
565✔
1128
            for (size_t lpc = 0; lpc < starting_index_size; lpc++) {
820✔
1129
                if (this->lf_format->lf_multiline) {
255✔
1130
                    this->lf_index[lpc].set_time(
255✔
1131
                        last_line.get_time<std::chrono::microseconds>());
1132
                    if (this->lf_format->lf_structured) {
255✔
1133
                        this->lf_index[lpc].set_ignore(true);
248✔
1134
                    }
1135
                } else {
UNCOV
1136
                    this->lf_index[lpc].set_time(
×
1137
                        last_line.get_time<std::chrono::microseconds>());
UNCOV
1138
                    this->lf_index[lpc].set_level(LEVEL_INVALID);
×
1139
                }
1140
                retval = true;
255✔
1141
            }
1142

1143
            found = best_match->second;
565✔
1144
        }
565✔
1145
    } else if (this->lf_format.get() != nullptr) {
18,898✔
1146
        if (!this->lf_index.empty()) {
3,585✔
1147
            prescan_time = this->lf_index[prescan_size - 1]
3,585✔
1148
                               .get_time<std::chrono::microseconds>();
3,585✔
1149
        }
1150
        /* We've locked onto a format, just use that scanner. */
1151
        found = this->lf_format->scan(*this, this->lf_index, li, sbr, sbc);
3,585✔
1152
    }
1153

1154
    if (found.is<log_format::scan_match>()) {
18,898✔
1155
        if (!this->lf_index.empty()) {
12,613✔
1156
            auto& last_line = this->lf_index.back();
12,613✔
1157

1158
            this->lf_level_stats.update_msg_count(last_line.get_msg_level());
12,613✔
1159
            last_line.set_valid_utf(last_line.is_valid_utf()
25,226✔
1160
                                    && li.li_utf8_scan_result.is_valid());
12,613✔
1161
            last_line.set_has_ansi(last_line.has_ansi()
25,226✔
1162
                                   || li.li_utf8_scan_result.usr_has_ansi);
12,613✔
1163
            if (last_line.get_msg_level() == LEVEL_INVALID) {
12,613✔
1164
                if (this->lf_invalid_lines.ili_lines.size()
9✔
1165
                    < invalid_line_info::MAX_INVALID_LINES)
9✔
1166
                {
1167
                    this->lf_invalid_lines.ili_lines.push_back(
18✔
1168
                        this->lf_index.size() - 1);
9✔
1169
                }
1170
                this->lf_invalid_lines.ili_total += 1;
9✔
1171
            }
1172
        }
1173
        if (prescan_size > 0 && this->lf_index.size() >= prescan_size
11,606✔
1174
            && prescan_time
24,219✔
1175
                != this->lf_index[prescan_size - 1]
23,212✔
1176
                       .get_time<std::chrono::microseconds>())
24,219✔
1177
        {
1178
            retval = true;
57✔
1179
        }
1180
        if (prescan_size > 0 && prescan_size < this->lf_index.size()) {
12,613✔
1181
            auto& second_to_last = this->lf_index[prescan_size - 1];
11,581✔
1182
            auto& latest = this->lf_index[prescan_size];
11,581✔
1183

1184
            if (!second_to_last.is_ignored() && latest < second_to_last) {
11,581✔
1185
                if (this->lf_format->lf_time_ordered) {
1,396✔
1186
                    this->lf_out_of_time_order_count += 1;
16✔
1187
                    for (size_t lpc = prescan_size; lpc < this->lf_index.size();
32✔
1188
                         lpc++)
1189
                    {
1190
                        auto& line_to_update = this->lf_index[lpc];
16✔
1191

1192
                        line_to_update.set_time_skew(true);
16✔
1193
                        line_to_update.set_time(
16✔
1194
                            second_to_last
1195
                                .get_time<std::chrono::microseconds>());
1196
                    }
1197
                } else {
1198
                    retval = true;
1,380✔
1199
                }
1200
            }
1201
        }
1202
    } else if (found.is<log_format::scan_no_match>()) {
6,285✔
1203
        log_level_t last_level = LEVEL_UNKNOWN;
6,285✔
1204
        auto last_time = this->lf_index_time;
6,285✔
1205
        auto continued = false;
6,285✔
1206

1207
        if (this->lf_format == nullptr && li.li_timestamp.tv_sec != 0) {
6,285✔
1208
            last_time = std::chrono::duration_cast<std::chrono::microseconds>(
60✔
1209
                            std::chrono::seconds{li.li_timestamp.tv_sec})
60✔
1210
                + std::chrono::microseconds(li.li_timestamp.tv_usec);
120✔
1211
            last_level = li.li_level;
60✔
1212
        } else if (!this->lf_index.empty()) {
6,225✔
1213
            const auto& ll = this->lf_index.back();
6,122✔
1214

1215
            /*
1216
             * Assume this line is part of the previous one(s) and copy the
1217
             * metadata over.
1218
             */
1219
            last_time = ll.get_time<std::chrono::microseconds>();
6,122✔
1220
            if (this->lf_format.get() != nullptr) {
6,122✔
1221
                last_level = ll.get_msg_level();
2,016✔
1222
                continued = true;
2,016✔
1223
            }
1224
        }
1225
        this->lf_index.emplace_back(
6,285✔
1226
            li.li_file_range.fr_offset, last_time, last_level);
6,285✔
1227
        auto& new_line = this->lf_index.back();
6,285✔
1228
        new_line.set_continued(continued);
6,285✔
1229
        new_line.set_valid_utf(li.li_utf8_scan_result.is_valid());
6,285✔
1230
        new_line.set_has_ansi(li.li_utf8_scan_result.usr_has_ansi);
6,285✔
1231
    }
1232

1233
    if (this->lf_format != nullptr
18,898✔
1234
        && this->lf_index.back().get_time<std::chrono::microseconds>()
48,156✔
1235
            > this->lf_options.loo_time_range.tr_end)
29,258✔
1236
    {
1237
        if (!this->lf_upper_bound_size) {
×
1238
            this->lf_upper_bound_size = this->lf_index.back().get_offset();
×
UNCOV
1239
            log_debug("%s:%zu: upper found in file found %llu",
×
1240
                      this->lf_filename_as_string.c_str(),
1241
                      this->lf_index.size(),
1242
                      this->lf_upper_bound_size.value());
1243
        }
UNCOV
1244
        this->lf_index.pop_back();
×
1245
    }
1246

1247
    return retval;
18,898✔
1248
}
18,898✔
1249

1250
logfile::rebuild_result_t
1251
logfile::rebuild_index(std::optional<ui_clock::time_point> deadline)
4,293✔
1252
{
1253
    static const auto& dts_cfg
1254
        = injector::get<const date_time_scanner_ns::config&>();
4,293✔
1255

1256
    static auto op = lnav_operation{"rebuild_file_index"};
4,293✔
1257
    auto op_guard = lnav_opid_guard::internal(op);
4,293✔
1258

1259
    if (!this->lf_invalidated_opids.empty()) {
4,293✔
UNCOV
1260
        auto writeOpids = this->lf_opids.writeAccess();
×
1261

1262
        for (auto bm_pair : this->lf_bookmark_metadata) {
×
1263
            if (bm_pair.second.bm_opid.empty()) {
×
UNCOV
1264
                continue;
×
1265
            }
1266

1267
            if (!this->lf_invalidated_opids.contains(bm_pair.second.bm_opid)) {
×
UNCOV
1268
                continue;
×
1269
            }
1270

1271
            auto opid_iter
1272
                = writeOpids->los_opid_ranges.find(bm_pair.second.bm_opid);
×
1273
            if (opid_iter == writeOpids->los_opid_ranges.end()) {
×
UNCOV
1274
                log_warning("opid not in ranges: %s",
×
1275
                            bm_pair.second.bm_opid.c_str());
UNCOV
1276
                continue;
×
1277
            }
1278

1279
            if (bm_pair.first >= this->lf_index.size()) {
×
1280
                log_warning("stale bookmark: %d", bm_pair.first);
×
UNCOV
1281
                continue;
×
1282
            }
1283

1284
            auto& ll = this->lf_index[bm_pair.first];
×
1285
            opid_iter->second.otr_range.extend_to(
×
1286
                ll.get_time<std::chrono::microseconds>());
×
UNCOV
1287
            opid_iter->second.otr_level_stats.update_msg_count(
×
1288
                ll.get_msg_level());
1289
        }
UNCOV
1290
        this->lf_invalidated_opids.clear();
×
1291
    }
1292

1293
    if (!this->lf_indexing) {
4,293✔
1294
        if (this->lf_sort_needed) {
19✔
1295
            this->lf_sort_needed = false;
×
UNCOV
1296
            return rebuild_result_t::NEW_ORDER;
×
1297
        }
1298
        return rebuild_result_t::NO_NEW_LINES;
19✔
1299
    }
1300

1301
    if (this->file_options_have_changed()
4,274✔
1302
        || (this->lf_format != nullptr
7,331✔
1303
            && (this->lf_zoned_to_local_state != dts_cfg.c_zoned_to_local
3,057✔
1304
                || this->lf_format->format_changed())))
3,057✔
1305
    {
1306
        log_info("%s: format has changed, rebuilding",
5✔
1307
                 this->lf_filename_as_string.c_str());
1308
        this->lf_index.clear();
5✔
1309
        this->lf_index_size = 0;
5✔
1310
        this->lf_partial_line = false;
5✔
1311
        this->lf_longest_line = 0;
5✔
1312
        this->lf_sort_needed = true;
5✔
1313
        this->lf_pattern_locks.pl_lines.clear();
5✔
1314
        this->lf_value_stats.clear();
5✔
1315
        {
1316
            safe::WriteAccess<safe_opid_state> writable_opid_map(
1317
                this->lf_opids);
5✔
1318

1319
            writable_opid_map->los_opid_ranges.clear();
5✔
1320
            writable_opid_map->los_sub_in_use.clear();
5✔
1321
        }
5✔
1322
        {
1323
            auto tids = this->lf_thread_ids.writeAccess();
5✔
1324

1325
            tids->ltis_tid_ranges.clear();
5✔
1326
        }
5✔
1327
        this->lf_allocator.reset();
5✔
1328
        if (this->lf_logline_observer) {
5✔
1329
            this->lf_logline_observer->logline_clear(*this);
5✔
1330
        }
1331
    }
1332
    this->lf_zoned_to_local_state = dts_cfg.c_zoned_to_local;
4,274✔
1333

1334
    auto retval = rebuild_result_t::NO_NEW_LINES;
4,274✔
1335
    struct stat st;
1336

1337
    this->lf_activity.la_polls += 1;
4,274✔
1338

1339
    if (fstat(this->lf_line_buffer.get_fd(), &st) == -1) {
4,274✔
1340
        if (errno == EINTR) {
×
UNCOV
1341
            return rebuild_result_t::NO_NEW_LINES;
×
1342
        }
UNCOV
1343
        return rebuild_result_t::INVALID;
×
1344
    }
1345

1346
    const auto is_truncated = st.st_size < this->lf_stat.st_size;
4,274✔
1347
    const auto is_user_provided_and_rewritten = (
4,274✔
1348
        // files from other sources can have their mtimes monkeyed with
1349
        this->lf_options.loo_source == logfile_name_source::USER
4,274✔
1350
        && this->lf_stat.st_size == st.st_size
4,274✔
1351
        && this->lf_stat.st_mtime != st.st_mtime);
8,548✔
1352

1353
    // Check the previous stat against the last to see if things are wonky.
1354
    if (this->lf_named_file && (is_truncated || is_user_provided_and_rewritten))
4,274✔
1355
    {
1356
        auto is_overwritten = true;
1✔
1357
        if (this->lf_format != nullptr) {
1✔
1358
            const auto first_line = this->lf_index.begin();
1✔
1359
            const auto first_line_range
1360
                = this->get_file_range(first_line, false);
1✔
1361
            auto read_res = this->read_range(first_line_range);
1✔
1362
            if (read_res.isOk()) {
1✔
1363
                auto sbr = read_res.unwrap();
1✔
1364
                if (first_line->has_ansi()) {
1✔
UNCOV
1365
                    sbr.erase_ansi();
×
1366
                }
1367
                auto curr_content_id
1368
                    = hasher().update(sbr.get_data(), sbr.length()).to_string();
1✔
1369

1370
                log_info(
1✔
1371
                    "%s: overwrite content_id double check: old:%s; now:%s",
1372
                    this->lf_filename_as_string.c_str(),
1373
                    this->lf_content_id.c_str(),
1374
                    curr_content_id.c_str());
1375
                if (this->lf_content_id == curr_content_id) {
1✔
1376
                    is_overwritten = false;
1✔
1377
                }
1378
            } else {
1✔
1379
                auto errmsg = read_res.unwrapErr();
×
UNCOV
1380
                log_error("unable to read first line for overwrite check: %s",
×
1381
                          errmsg.c_str());
1382
            }
1383
        }
1✔
1384

1385
        if (is_truncated || is_overwritten) {
1✔
1386
            log_info("overwritten file detected, closing -- %s  new: %" PRId64
1✔
1387
                     "/%" PRId64 "  old: %" PRId64 "/%" PRId64,
1388
                     this->lf_filename_as_string.c_str(),
1389
                     st.st_size,
1390
                     st.st_mtime,
1391
                     this->lf_stat.st_size,
1392
                     this->lf_stat.st_mtime);
1393
            this->close();
1✔
1394
            return rebuild_result_t::NO_NEW_LINES;
1✔
1395
        }
1396
    }
1397

1398
    if (this->lf_text_format == text_format_t::TF_BINARY) {
4,273✔
1399
        this->lf_index_size = st.st_size;
22✔
1400
        this->lf_stat = st;
22✔
1401
    } else if (this->lf_upper_bound_size) {
4,251✔
1402
        this->lf_index_size = this->get_content_size();
×
UNCOV
1403
        this->lf_stat = st;
×
1404
    } else if (this->lf_line_buffer.is_data_available(this->lf_index_size,
4,251✔
1405
                                                      st.st_size))
1406
    {
1407
        this->lf_activity.la_reads += 1;
1,177✔
1408

1409
        // We haven't reached the end of the file.  Note that we use the
1410
        // line buffer's notion of the file size since it may be compressed.
1411
        bool has_format = this->lf_format.get() != nullptr;
1,177✔
1412
        struct rusage begin_rusage;
1413
        file_off_t off;
1414
        size_t begin_size = this->lf_index.size();
1,177✔
1415
        bool record_rusage = this->lf_index.size() == 1;
1,177✔
1416
        off_t begin_index_size = this->lf_index_size;
1,177✔
1417
        size_t rollback_size = 0, rollback_index_start = 0;
1,177✔
1418

1419
        if (record_rusage) {
1,177✔
1420
            getrusage(RUSAGE_SELF, &begin_rusage);
453✔
1421
        }
1422

1423
        if (begin_size == 0 && !has_format) {
1,177✔
1424
            log_debug("scanning file... fd(%d) %s",
652✔
1425
                      this->lf_line_buffer.get_fd(),
1426
                      this->lf_filename_as_string.c_str());
1427
        }
1428

1429
        if (!this->lf_index.empty()) {
1,177✔
1430
            off = this->lf_index.back().get_offset();
518✔
1431

1432
            /*
1433
             * Drop the last line we read since it might have been a partial
1434
             * read.
1435
             */
1436
            while (this->lf_index.back().get_sub_offset() != 0) {
630✔
1437
                this->lf_index.pop_back();
112✔
1438
                rollback_size += 1;
112✔
1439
            }
1440
            this->lf_index.pop_back();
518✔
1441
            rollback_index_start = this->lf_index.size();
518✔
1442
            rollback_size += 1;
518✔
1443

1444
            if (!this->lf_index.empty()) {
518✔
1445
                auto last_line = std::prev(this->lf_index.end());
42✔
1446
                if (last_line != this->lf_index.begin()) {
42✔
1447
                    auto prev_line = std::prev(last_line);
41✔
1448
                    this->lf_line_buffer.flush_at(prev_line->get_offset());
41✔
1449
                    auto prev_len_res
1450
                        = this->message_byte_length(prev_line, false);
41✔
1451

1452
                    auto read_result = this->lf_line_buffer.read_range({
1453
                        prev_line->get_offset(),
41✔
1454
                        prev_len_res.mlr_length + 1,
41✔
1455
                    });
82✔
1456
                    if (read_result.isErr()) {
41✔
UNCOV
1457
                        log_info(
×
1458
                            "overwritten file detected, closing -- %s (%s)",
1459
                            this->lf_filename_as_string.c_str(),
1460
                            read_result.unwrapErr().c_str());
1461
                        this->close();
×
UNCOV
1462
                        return rebuild_result_t::INVALID;
×
1463
                    }
1464

1465
                    auto sbr = read_result.unwrap();
41✔
1466
                    if (!sbr.to_string_fragment().endswith("\n")) {
41✔
UNCOV
1467
                        log_info("overwritten file detected, closing -- %s",
×
1468
                                 this->lf_filename_as_string.c_str());
1469
                        this->close();
×
UNCOV
1470
                        return rebuild_result_t::INVALID;
×
1471
                    }
1472
                } else {
41✔
1473
                    this->lf_line_buffer.flush_at(last_line->get_offset());
1✔
1474
                }
1475
                auto last_length_res
1476
                    = this->message_byte_length(last_line, false);
42✔
1477

1478
                auto read_result = this->lf_line_buffer.read_range({
1479
                    last_line->get_offset(),
42✔
1480
                    last_length_res.mlr_length,
42✔
1481
                });
84✔
1482

1483
                if (read_result.isErr()) {
42✔
UNCOV
1484
                    log_info("overwritten file detected, closing -- %s (%s)",
×
1485
                             this->lf_filename_as_string.c_str(),
1486
                             read_result.unwrapErr().c_str());
1487
                    this->close();
×
UNCOV
1488
                    return rebuild_result_t::INVALID;
×
1489
                }
1490
            } else {
42✔
1491
                this->lf_line_buffer.flush_at(0);
476✔
1492
            }
1493
        } else {
1494
            this->lf_line_buffer.flush_at(0);
659✔
1495
            off = this->lf_index_size;
659✔
1496
        }
1497
        if (this->lf_logline_observer != nullptr) {
1,177✔
1498
            this->lf_logline_observer->logline_restart(*this, rollback_size);
1,088✔
1499
        }
1500

1501
        bool sort_needed = std::exchange(this->lf_sort_needed, false);
1,177✔
1502
        size_t limit = SIZE_MAX;
1,177✔
1503

1504
        if (deadline) {
1,177✔
1505
            if (ui_clock::now() > deadline.value()) {
2✔
1506
                if (has_format) {
×
UNCOV
1507
                    log_warning("with format ran past deadline! -- %s",
×
1508
                                this->lf_filename_as_string.c_str());
UNCOV
1509
                    limit = 1000;
×
1510
                } else {
UNCOV
1511
                    limit = 100;
×
1512
                }
1513
            } else if (this->lf_options.loo_detect_format
4✔
1514
                       && (!has_format
3✔
1515
                           || (this->lf_options.loo_time_range.has_bounds()
1✔
UNCOV
1516
                               && this->lf_file_size_at_map_time == 0)))
×
1517
            {
1518
                limit = 1000;
1✔
1519
            } else {
1520
                limit = 1000 * 1000;
1✔
1521
            }
1522
        }
1523
        if (!has_format) {
1,177✔
1524
            log_debug("loading file... %s:%zu",
652✔
1525
                      this->lf_filename_as_string.c_str(),
1526
                      begin_size);
1527
        }
1528
        scan_batch_context sbc{this->lf_allocator, this->lf_pattern_locks};
1,177✔
1529
        sbc.sbc_opids.los_opid_ranges.reserve(32);
1,177✔
1530
        sbc.sbc_tids.ltis_tid_ranges.reserve(8);
1,177✔
1531
        auto prev_range = file_range{off};
1,177✔
1532
        while (limit > 0) {
19,489✔
1533
            auto load_result = this->lf_line_buffer.load_next_line(prev_range);
19,489✔
1534
            if (load_result.isErr()) {
19,489✔
UNCOV
1535
                log_error("%s: load next line failure -- %s",
×
1536
                          this->lf_filename_as_string.c_str(),
1537
                          load_result.unwrapErr().c_str());
1538
                this->close();
×
UNCOV
1539
                return rebuild_result_t::INVALID;
×
1540
            }
1541

1542
            auto li = load_result.unwrap();
19,489✔
1543
            if (li.li_file_range.empty()) {
19,489✔
1544
                break;
591✔
1545
            }
1546
            prev_range = li.li_file_range;
18,898✔
1547

1548
            auto read_result
1549
                = this->lf_line_buffer.read_range(li.li_file_range);
18,898✔
1550
            if (read_result.isErr()) {
18,898✔
UNCOV
1551
                log_error("%s:read failure -- %s",
×
1552
                          this->lf_filename_as_string.c_str(),
1553
                          read_result.unwrapErr().c_str());
1554
                this->close();
×
UNCOV
1555
                return rebuild_result_t::INVALID;
×
1556
            }
1557

1558
            auto sbr = read_result.unwrap();
18,898✔
1559

1560
            if (this->lf_format == nullptr
18,898✔
1561
                && !this->lf_options.loo_non_utf_is_visible
4,834✔
1562
                && !li.li_utf8_scan_result.is_valid())
23,732✔
1563
            {
UNCOV
1564
                log_info("file is not utf, hiding: %s",
×
1565
                         this->lf_filename_as_string.c_str());
1566
                this->lf_indexing = false;
×
1567
                this->lf_options.loo_is_visible = false;
×
1568
                attr_line_t hex;
×
1569
                attr_line_builder alb(hex);
×
UNCOV
1570
                alb.append_as_hexdump(sbr.to_string_fragment());
×
1571
                auto snip = lnav::console::snippet::from(
1572
                    source_location{
1573
                        intern_string::lookup(this->lf_filename),
UNCOV
1574
                        (int) this->lf_index.size() + 1,
×
1575
                    },
UNCOV
1576
                    hex);
×
1577
                auto note_um
1578
                    = lnav::console::user_message::warning(
×
1579
                          attr_line_t("skipping indexing for ")
×
1580
                              .append_quoted(this->lf_filename))
×
1581
                          .with_reason("File contains invalid UTF-8")
×
1582
                          .with_note(
×
1583
                              attr_line_t(li.li_utf8_scan_result.usr_message)
×
1584
                                  .append(" at line ")
×
1585
                                  .append(lnav::roles::number(fmt::to_string(
×
1586
                                      this->lf_index.size() + 1)))
×
1587
                                  .append(" column ")
×
UNCOV
1588
                                  .append(lnav::roles::number(fmt::to_string(
×
1589
                                      li.li_utf8_scan_result.usr_valid_frag
1590
                                          .sf_end))))
1591
                          .with_snippet(snip)
×
1592
                          .move();
×
UNCOV
1593
                this->lf_notes.writeAccess()->insert(note_type::not_utf,
×
1594
                                                     note_um);
1595
                if (this->lf_logfile_observer != nullptr) {
×
UNCOV
1596
                    this->lf_logfile_observer->logfile_indexing(this, 0, 0);
×
1597
                }
UNCOV
1598
                break;
×
1599
            }
1600
            size_t old_size = this->lf_index.size();
18,898✔
1601

1602
            if (old_size == 0
18,898✔
1603
                && this->lf_text_format == text_format_t::TF_UNKNOWN)
1,135✔
1604
            {
1605
                auto fr = this->lf_line_buffer.get_available();
637✔
1606
                auto avail_data = this->lf_line_buffer.read_range(fr);
637✔
1607

1608
                this->lf_text_format
1609
                    = avail_data
637✔
1610
                          .map([path = this->get_path(),
1,274✔
1611
                                this](const shared_buffer_ref& avail_sbr)
1612
                                   -> text_format_t {
1613
                              constexpr auto DETECT_LIMIT = 16 * 1024;
637✔
1614
                              auto sbr_str = to_string(avail_sbr);
637✔
1615
                              if (sbr_str.size() > DETECT_LIMIT) {
637✔
1616
                                  sbr_str.resize(DETECT_LIMIT);
35✔
1617
                              }
1618

1619
                              if (this->lf_line_buffer.is_piper()) {
637✔
1620
                                  auto lines
1621
                                      = string_fragment::from_str(sbr_str)
45✔
1622
                                            .split_lines();
45✔
1623
                                  for (auto line_iter = lines.rbegin();
45✔
1624
                                       // XXX rejigger read_range() for
1625
                                       // multi-line reads
1626
                                       std::next(line_iter) != lines.rend();
312✔
1627
                                       ++line_iter)
111✔
1628
                                  {
1629
                                      sbr_str.erase(line_iter->sf_begin, 22);
111✔
1630
                                  }
1631
                              }
45✔
1632
                              auto utf8_res = is_utf8(sbr_str);
637✔
1633
                              if (!utf8_res.is_valid()) {
637✔
1634
                                  return text_format_t::TF_BINARY;
5✔
1635
                              }
1636
                              if (utf8_res.usr_has_ansi) {
632✔
1637
                                  auto new_size = erase_ansi_escapes(sbr_str);
16✔
1638
                                  sbr_str.resize(new_size);
16✔
1639
                              }
1640
                              return detect_text_format(sbr_str, path);
632✔
1641
                          })
637✔
1642
                          .unwrapOr(text_format_t::TF_UNKNOWN);
637✔
1643
                log_debug("setting text format to %s",
637✔
1644
                          fmt::to_string(this->lf_text_format).c_str());
1645
                switch (this->lf_text_format) {
637✔
1646
                    case text_format_t::TF_DIFF:
18✔
1647
                    case text_format_t::TF_MAN:
1648
                    case text_format_t::TF_MARKDOWN:
1649
                        log_debug(
18✔
1650
                            "  file is text, disabling log format detection");
1651
                        this->lf_options.loo_detect_format = false;
18✔
1652
                        break;
18✔
1653
                    default:
619✔
1654
                        break;
619✔
1655
                }
1656
            }
637✔
1657

1658
            if (!li.li_utf8_scan_result.is_valid()) {
18,898✔
1659
                log_warning(
53✔
1660
                    "%s: invalid UTF-8 detected at L%zu:C%d/%lld (O:%lld) -- "
1661
                    "%s",
1662
                    this->lf_filename_as_string.c_str(),
1663
                    this->lf_index.size() + 1,
1664
                    li.li_utf8_scan_result.usr_valid_frag.sf_end,
1665
                    li.li_file_range.fr_size,
1666
                    li.li_file_range.fr_offset,
1667
                    li.li_utf8_scan_result.usr_message);
1668
                if (lnav_log_level <= lnav_log_level_t::TRACE) {
53✔
1669
                    attr_line_t al;
×
1670
                    attr_line_builder alb(al);
×
1671
                    alb.append_as_hexdump(
×
1672
                        sbr.to_string_fragment().sub_range(0, 256));
×
UNCOV
1673
                    log_warning("  dump: %s", al.al_string.c_str());
×
1674
                }
1675
            }
1676

1677
            sbr.rtrim(is_line_ending);
18,898✔
1678

1679
            if (li.li_utf8_scan_result.is_valid()
18,898✔
1680
                && li.li_utf8_scan_result.usr_has_ansi)
18,898✔
1681
            {
1682
                sbr.erase_ansi();
105✔
1683
            }
1684

1685
            this->lf_longest_line
1686
                = std::max(this->lf_longest_line,
18,898✔
1687
                           li.li_utf8_scan_result.usr_column_width_guess);
1688
            this->lf_partial_line = li.li_partial;
18,898✔
1689
            sort_needed = this->process_prefix(sbr, li, sbc) || sort_needed;
18,898✔
1690

1691
            if (old_size > this->lf_index.size()) {
18,898✔
UNCOV
1692
                old_size = 0;
×
1693
            }
1694

1695
            // Update this early so that line_length() works
1696
            this->lf_index_size = li.li_file_range.next_offset();
18,898✔
1697

1698
            if (this->lf_logline_observer != nullptr) {
18,898✔
1699
                auto nl_rc = this->lf_logline_observer->logline_new_lines(
36,514✔
1700
                    *this, this->begin() + old_size, this->end(), sbr);
36,514✔
1701
                if (rollback_size > 0 && old_size == rollback_index_start
18,257✔
1702
                    && nl_rc)
474✔
1703
                {
1704
                    log_debug(
3✔
1705
                        "%s: rollbacked line %zu matched filter, forcing "
1706
                        "full sort",
1707
                        this->lf_filename_as_string.c_str(),
1708
                        rollback_index_start);
1709
                    sort_needed = true;
3✔
1710
                }
1711
            }
1712

1713
            if (this->lf_logfile_observer != nullptr) {
18,898✔
1714
                auto indexing_res = this->lf_logfile_observer->logfile_indexing(
18,257✔
1715
                    this,
1716
                    this->lf_line_buffer.get_read_offset(
1717
                        li.li_file_range.next_offset()),
1718
                    this->get_content_size());
1719

1720
                if (indexing_res == lnav::progress_result_t::interrupt) {
18,257✔
UNCOV
1721
                    break;
×
1722
                }
1723
            }
1724

1725
            if (!has_format && this->lf_format != nullptr) {
18,898✔
1726
                break;
565✔
1727
            }
1728
            if (begin_size == 0 && !has_format
18,333✔
1729
                && li.li_file_range.fr_offset > 16 * 1024)
4,269✔
1730
            {
1731
                break;
1✔
1732
            }
1733
#if 0
1734
            if (this->lf_line_buffer.is_likely_to_flush(prev_range)
1735
                && this->lf_index.size() - begin_size > 1)
1736
            {
1737
                log_debug("likely to flush, breaking");
1738
                break;
1739
            }
1740
#endif
1741
            if (this->lf_format) {
18,332✔
1742
                auto sf = sbr.to_string_fragment();
14,064✔
1743

1744
                for (const auto& td : this->lf_applicable_taggers) {
14,208✔
1745
                    auto curr_ll = this->end() - 1;
144✔
1746

1747
                    if (td->ftd_level != LEVEL_UNKNOWN
144✔
1748
                        && td->ftd_level != curr_ll->get_msg_level())
144✔
1749
                    {
UNCOV
1750
                        continue;
×
1751
                    }
1752

1753
                    if (td->ftd_pattern.pp_value
144✔
1754
                            ->find_in(sf, PCRE2_NO_UTF_CHECK)
288✔
1755
                            .ignore_error()
288✔
1756
                            .has_value())
144✔
1757
                    {
1758
                        while (curr_ll->is_continued()) {
4✔
UNCOV
1759
                            --curr_ll;
×
1760
                        }
1761
                        curr_ll->set_meta_mark(true);
4✔
1762
                        auto line_number = static_cast<uint32_t>(
1763
                            std::distance(this->begin(), curr_ll));
4✔
1764

1765
                        this->lf_bookmark_metadata[line_number].add_tag(
4✔
1766
                            td->ftd_name);
4✔
1767
                    }
1768
                }
1769

1770
                for (const auto& pd : this->lf_applicable_partitioners) {
14,183✔
1771
                    thread_local auto part_md
1772
                        = lnav::pcre2pp::match_data::unitialized();
119✔
1773

1774
                    auto curr_ll = this->end() - 1;
119✔
1775

1776
                    if (pd->fpd_level != LEVEL_UNKNOWN
119✔
1777
                        && pd->fpd_level != curr_ll->get_msg_level())
119✔
1778
                    {
UNCOV
1779
                        continue;
×
1780
                    }
1781

1782
                    auto match_res = pd->fpd_pattern.pp_value->capture_from(sf)
119✔
1783
                                         .into(part_md)
119✔
1784
                                         .matches(PCRE2_NO_UTF_CHECK)
238✔
1785
                                         .ignore_error();
119✔
1786
                    if (match_res) {
119✔
1787
                        while (curr_ll->is_continued()) {
8✔
UNCOV
1788
                            --curr_ll;
×
1789
                        }
1790
                        curr_ll->set_meta_mark(true);
8✔
1791
                        auto line_number = static_cast<uint32_t>(
1792
                            std::distance(this->begin(), curr_ll));
8✔
1793

1794
                        this->lf_bookmark_metadata[line_number].bm_name
8✔
1795
                            = part_md.to_string();
16✔
1796
                    }
1797
                }
1798

1799
                if (!this->back().is_continued()) {
14,064✔
1800
                    lnav::log::watch::eval_with(*this, this->end() - 1);
11,739✔
1801
                }
1802
            }
1803

1804
            if (li.li_partial) {
18,332✔
1805
                // The last read was at the end of the file, so break.  We'll
1806
                // need to cycle back around to pop off this partial line in
1807
                // order to continue reading correctly.
1808
                break;
20✔
1809
            }
1810

1811
            if (this->lf_upper_bound_size) {
18,312✔
UNCOV
1812
                break;
×
1813
            }
1814

1815
            limit -= 1;
18,312✔
1816
        }
20,661✔
1817

1818
        if (this->lf_format == nullptr
1,177✔
1819
            && this->lf_options.loo_visible_size_limit > 0
87✔
UNCOV
1820
            && prev_range.fr_offset > 256 * 1024
×
1821
            && st.st_size >= this->lf_options.loo_visible_size_limit)
1,264✔
1822
        {
UNCOV
1823
            log_info("file has unknown format and is too large: %s",
×
1824
                     this->lf_filename_as_string.c_str());
UNCOV
1825
            this->lf_indexing = false;
×
1826
            auto note_um
UNCOV
1827
                = lnav::console::user_message::warning(
×
1828
                      "skipping indexing for file")
UNCOV
1829
                      .with_reason(
×
1830
                          "file is large and has no discernible log format")
1831
                      .move();
×
UNCOV
1832
            this->lf_notes.writeAccess()->insert(note_type::indexing_disabled,
×
1833
                                                 note_um);
1834
            if (this->lf_logfile_observer != nullptr) {
×
UNCOV
1835
                this->lf_logfile_observer->logfile_indexing(this, 0, 0);
×
1836
            }
1837
        }
1838

1839
        if (this->lf_logline_observer != nullptr) {
1,177✔
1840
            this->lf_logline_observer->logline_eof(*this);
1,088✔
1841
        }
1842

1843
        if (record_rusage
1,177✔
1844
            && (prev_range.fr_offset - begin_index_size) > (500 * 1024))
453✔
1845
        {
1846
            rusage end_rusage;
1847

1848
            getrusage(RUSAGE_SELF, &end_rusage);
×
UNCOV
1849
            rusagesub(end_rusage,
×
1850
                      begin_rusage,
1851
                      this->lf_activity.la_initial_index_rusage);
×
UNCOV
1852
            log_info("Resource usage for initial indexing of file: %s:%zu-%zu",
×
1853
                     this->lf_filename_as_string.c_str(),
1854
                     begin_size,
1855
                     this->lf_index.size());
UNCOV
1856
            log_rusage(lnav_log_level_t::INFO,
×
1857
                       this->lf_activity.la_initial_index_rusage);
1858
        }
1859

1860
        /*
1861
         * The file can still grow between the above fstat and when we're
1862
         * doing the scanning, so use the line buffer's notion of the file
1863
         * size.
1864
         */
1865
        this->lf_index_size = prev_range.next_offset();
1,177✔
1866
        this->lf_stat = st;
1,177✔
1867

1868
        this->lf_value_stats.resize(sbc.sbc_value_stats.size());
1,177✔
1869
        for (size_t lpc = 0; lpc < sbc.sbc_value_stats.size(); lpc++) {
16,158✔
1870
            this->lf_value_stats[lpc].merge(sbc.sbc_value_stats[lpc]);
14,981✔
1871
        }
1872
        {
1873
            safe::WriteAccess<safe_opid_state> writable_opid_map(
1874
                this->lf_opids);
1,177✔
1875

1876
            for (const auto& opid_pair : sbc.sbc_opids.los_opid_ranges) {
4,759✔
1877
                auto opid_iter
1878
                    = writable_opid_map->los_opid_ranges.find(opid_pair.first);
3,582✔
1879

1880
                if (opid_iter == writable_opid_map->los_opid_ranges.end()) {
3,582✔
1881
                    writable_opid_map->los_opid_ranges.emplace(opid_pair);
3,219✔
1882
                } else {
1883
                    opid_iter->second |= opid_pair.second;
363✔
1884
                }
1885
            }
1886
            log_debug(
1,177✔
1887
                "%s: opid_map size: count=%zu; sizeof(otr)=%zu; alloc=%zu",
1888
                this->lf_filename_as_string.c_str(),
1889
                writable_opid_map->los_opid_ranges.size(),
1890
                sizeof(opid_time_range),
1891
                this->lf_allocator.getNumBytesAllocated());
1892
        }
1,177✔
1893
        {
1894
            auto tids = this->lf_thread_ids.writeAccess();
1,177✔
1895

1896
            for (const auto& tid_pair : sbc.sbc_tids.ltis_tid_ranges) {
2,452✔
1897
                auto tid_iter = tids->ltis_tid_ranges.find(tid_pair.first);
1,275✔
1898
                if (tid_iter == tids->ltis_tid_ranges.end()) {
1,275✔
1899
                    tids->ltis_tid_ranges.emplace(tid_pair);
800✔
1900
                } else {
1901
                    tid_iter->second |= tid_pair.second;
475✔
1902
                }
1903
            }
1904
            log_debug("%s: tid_map size: count=%zu; sizeof(otr)=%zu; alloc=%zu",
1,177✔
1905
                      this->lf_filename_as_string.c_str(),
1906
                      tids->ltis_tid_ranges.size(),
1907
                      sizeof(opid_time_range),
1908
                      this->lf_allocator.getNumBytesAllocated());
1909
        }
1,177✔
1910

1911
        if (begin_size > this->lf_index.size()) {
1,177✔
UNCOV
1912
            log_info("overwritten file detected, closing -- %s",
×
1913
                     this->lf_filename_as_string.c_str());
1914
            this->close();
×
UNCOV
1915
            return rebuild_result_t::INVALID;
×
1916
        }
1917

1918
        if (sort_needed || begin_size > this->lf_index.size()) {
1,177✔
1919
            retval = rebuild_result_t::NEW_ORDER;
96✔
1920
        } else {
1921
            retval = rebuild_result_t::NEW_LINES;
1,081✔
1922
        }
1923

1924
        {
1925
            auto est_rem = this->estimated_remaining_lines();
1,177✔
1926
            if (est_rem > 0) {
1,177✔
1927
                this->lf_index.reserve(this->lf_index.size() + est_rem);
477✔
1928
            }
1929
        }
1930

1931
        if (this->lf_format != nullptr
1,177✔
1932
            && this->lf_options.loo_time_range.has_bounds()
1,090✔
1933
            && (this->lf_index.size() >= RETRY_MATCH_SIZE
6✔
1934
                || this->lf_index_size == this->get_content_size())
6✔
1935
            && this->lf_file_size_at_map_time != this->get_content_size())
2,267✔
1936
        {
1937
            switch (this->build_content_map()) {
3✔
1938
                case rebuild_result_t::NEW_ORDER:
2✔
1939
                    retval = rebuild_result_t::NEW_ORDER;
2✔
1940
                    break;
2✔
1941
                default:
1✔
1942
                    break;
1✔
1943
            }
1944
        }
1945

1946
        for (auto& lvs : this->lf_value_stats) {
16,129✔
1947
            {
1948
                lvs.lvs_tdigest.merge();
14,952✔
1949
                auto p25 = lvs.lvs_tdigest.quantile(25);
14,952✔
1950
                auto p50 = lvs.lvs_tdigest.quantile(50);
14,952✔
1951
                auto p75 = lvs.lvs_tdigest.quantile(75);
14,952✔
1952
                log_debug("stats[] p25=%f p50=%f p75=%f", p25, p50, p75);
14,952✔
1953
            }
1954
        }
1955
    } else {
1,177✔
1956
        this->lf_stat = st;
3,074✔
1957
        if (this->lf_sort_needed) {
3,074✔
1958
            retval = rebuild_result_t::NEW_ORDER;
13✔
1959
            this->lf_sort_needed = false;
13✔
1960
        }
1961
    }
1962

1963
    this->lf_index_time
1964
        = std::chrono::seconds{this->lf_line_buffer.get_file_time()};
4,273✔
1965
    if (this->lf_index_time.count() == 0) {
4,273✔
1966
        this->lf_index_time = std::chrono::seconds{st.st_mtime};
4,251✔
1967
    }
1968

1969
    if (this->lf_out_of_time_order_count) {
4,273✔
1970
        log_info("Detected %d out-of-time-order lines in file: %s",
8✔
1971
                 this->lf_out_of_time_order_count,
1972
                 this->lf_filename_as_string.c_str());
1973
        this->lf_out_of_time_order_count = 0;
8✔
1974
    }
1975

1976
    return retval;
4,273✔
1977
}
4,293✔
1978

1979
Result<shared_buffer_ref, std::string>
1980
logfile::read_line(iterator ll, subline_options opts)
29,274✔
1981
{
1982
    try {
1983
        auto get_range_res = this->get_file_range(ll, false);
29,274✔
1984
        return this->lf_line_buffer.read_range(get_range_res)
58,548✔
1985
            .map([&ll, &get_range_res, &opts, this](auto sbr) {
29,274✔
1986
                sbr.rtrim(is_line_ending);
29,274✔
1987
                if (!get_range_res.fr_metadata.m_valid_utf) {
29,274✔
1988
                    scrub_to_utf8(sbr.get_writable_data(), sbr.length());
6✔
1989
                    sbr.get_metadata().m_valid_utf = true;
6✔
1990
                }
1991

1992
                if (this->lf_format != nullptr) {
29,274✔
1993
                    this->lf_format->get_subline(
49,842✔
1994
                        {this->lf_value_stats, this->lf_pattern_locks},
24,921✔
1995
                        *ll,
24,921✔
1996
                        sbr,
1997
                        opts);
1998
                }
1999

2000
                return sbr;
29,274✔
2001
            });
29,274✔
2002
    } catch (const line_buffer::error& e) {
×
2003
        return Err(std::error_code{e.e_err, std::generic_category()}.message());
×
UNCOV
2004
    }
×
2005
}
2006

2007
Result<logfile::read_file_result, std::string>
2008
logfile::read_file(read_format_t format)
112✔
2009
{
2010
    if (this->lf_stat.st_size > line_buffer::MAX_LINE_BUFFER_SIZE) {
112✔
UNCOV
2011
        return Err(std::string("file is too large to read"));
×
2012
    }
2013

2014
    auto retval = read_file_result{};
112✔
2015
    retval.rfr_content.reserve(this->lf_stat.st_size);
112✔
2016

2017
    if (format == read_format_t::with_framing) {
112✔
2018
        retval.rfr_content.append(this->lf_line_buffer.get_piper_header_size(),
95✔
2019
                                  '\x16');
2020
    }
2021
    for (auto iter = this->begin(); iter != this->end(); ++iter) {
6,382✔
2022
        const auto fr = this->get_file_range(iter);
6,270✔
2023
        retval.rfr_range.fr_metadata |= fr.fr_metadata;
6,270✔
2024
        retval.rfr_range.fr_size = fr.next_offset();
6,270✔
2025
        auto sbr = TRY(this->lf_line_buffer.read_range(fr));
6,270✔
2026

2027
        if (format == read_format_t::with_framing
6,270✔
2028
            && this->lf_line_buffer.is_piper())
6,270✔
2029
        {
2030
            retval.rfr_content.append(22, '\x16');
58✔
2031
        }
2032
        retval.rfr_content.append(sbr.get_data(), sbr.length());
6,270✔
2033
        if ((file_ssize_t) retval.rfr_content.size() < this->lf_stat.st_size) {
6,270✔
2034
            retval.rfr_content.push_back('\n');
6,264✔
2035
        }
2036
    }
6,270✔
2037

2038
    return Ok(std::move(retval));
112✔
2039
}
112✔
2040

2041
Result<shared_buffer_ref, std::string>
2042
logfile::read_range(const file_range& fr)
1,621✔
2043
{
2044
    return this->lf_line_buffer.read_range(fr);
1,621✔
2045
}
2046

2047
void
2048
logfile::read_full_message(const_iterator ll,
35,209✔
2049
                           shared_buffer_ref& msg_out,
2050
                           line_buffer::scan_direction dir,
2051
                           read_format_t format)
2052
{
2053
    require(ll->get_sub_offset() == 0);
35,209✔
2054

2055
#if 0
2056
    log_debug(
2057
        "%s: reading msg at %d", this->lf_filename_as_string.c_str(), ll->get_offset());
2058
#endif
2059

2060
    msg_out.disown();
35,209✔
2061
    auto mlr = this->message_byte_length(ll);
35,209✔
2062
    auto range_for_line
2063
        = file_range{ll->get_offset(), mlr.mlr_length, mlr.mlr_metadata};
35,209✔
2064
    try {
2065
        if (range_for_line.fr_size > line_buffer::MAX_LINE_BUFFER_SIZE) {
35,209✔
UNCOV
2066
            range_for_line.fr_size = line_buffer::MAX_LINE_BUFFER_SIZE;
×
2067
        }
2068
        if (format == read_format_t::plain && mlr.mlr_line_count > 1
35,209✔
2069
            && this->lf_line_buffer.is_piper())
70,418✔
2070
        {
2071
            this->lf_plain_msg_shared.invalidate_refs();
37✔
2072
            this->lf_plain_msg_buffer.expand_to(mlr.mlr_length);
37✔
2073
            this->lf_plain_msg_buffer.clear();
37✔
2074
            auto curr_ll = ll;
37✔
2075
            do {
2076
                const auto curr_range = this->get_file_range(curr_ll, false);
47✔
2077
                auto read_result
2078
                    = this->lf_line_buffer.read_range(curr_range, dir);
47✔
2079

2080
                if (curr_ll != ll) {
47✔
2081
                    this->lf_plain_msg_buffer.push_back('\n');
10✔
2082
                }
2083
                if (read_result.isErr()) {
47✔
2084
                    auto errmsg = read_result.unwrapErr();
×
UNCOV
2085
                    log_error("%s:%zu:unable to read range %lld:%lld -- %s",
×
2086
                              this->get_unique_path().c_str(),
2087
                              std::distance(this->cbegin(), ll),
2088
                              range_for_line.fr_offset,
2089
                              range_for_line.fr_size,
2090
                              errmsg.c_str());
UNCOV
2091
                    return;
×
2092
                }
2093

2094
                auto curr_buf = read_result.unwrap();
47✔
2095
                this->lf_plain_msg_buffer.append(curr_buf.to_string_view());
47✔
2096

2097
                ++curr_ll;
47✔
2098
            } while (curr_ll != this->end() && curr_ll->is_continued()
140✔
2099
                     && curr_ll->get_sub_offset() == 0);
93✔
2100
            msg_out.share(this->lf_plain_msg_shared,
74✔
2101
                          this->lf_plain_msg_buffer.data(),
37✔
2102
                          this->lf_plain_msg_buffer.size());
2103
        } else {
2104
            auto read_result
2105
                = this->lf_line_buffer.read_range(range_for_line, dir);
35,172✔
2106

2107
            if (read_result.isErr()) {
35,172✔
2108
                auto errmsg = read_result.unwrapErr();
×
UNCOV
2109
                log_error("%s:%zu:unable to read range %lld:%lld -- %s",
×
2110
                          this->get_unique_path().c_str(),
2111
                          std::distance(this->cbegin(), ll),
2112
                          range_for_line.fr_offset,
2113
                          range_for_line.fr_size,
2114
                          errmsg.c_str());
UNCOV
2115
                return;
×
2116
            }
2117
            msg_out = read_result.unwrap();
35,172✔
2118
            msg_out.get_metadata() = range_for_line.fr_metadata;
35,172✔
2119
        }
35,172✔
2120
        if (this->lf_format.get() != nullptr) {
35,209✔
2121
            this->lf_format->get_subline(
70,418✔
2122
                {this->lf_value_stats, this->lf_pattern_locks},
35,209✔
2123
                *ll,
35,209✔
2124
                msg_out,
2125
                {true});
2126
        }
2127
    } catch (const line_buffer::error& e) {
×
2128
        log_error("failed to read line");
×
UNCOV
2129
    }
×
2130
}
2131

2132
void
2133
logfile::set_logline_observer(logline_observer* llo)
1,759✔
2134
{
2135
    this->lf_logline_observer = llo;
1,759✔
2136
    if (llo != nullptr) {
1,759✔
2137
        this->reobserve_from(this->begin());
1,139✔
2138
    }
2139
}
1,759✔
2140

2141
void
2142
logfile::reobserve_from(iterator iter)
1,260✔
2143
{
2144
    for (; iter != this->end(); ++iter) {
2,385✔
2145
        off_t offset = std::distance(this->begin(), iter);
1,125✔
2146

2147
        if (iter->get_sub_offset() > 0) {
1,125✔
2148
            continue;
135✔
2149
        }
2150

2151
        if (this->lf_logfile_observer != nullptr) {
990✔
2152
            auto indexing_res = this->lf_logfile_observer->logfile_indexing(
990✔
2153
                this, offset, this->size());
990✔
2154
            if (indexing_res == lnav::progress_result_t::interrupt) {
990✔
UNCOV
2155
                break;
×
2156
            }
2157
        }
2158

2159
        this->read_line(iter).then([this, iter](auto sbr) {
990✔
2160
            auto iter_end = iter + 1;
990✔
2161

2162
            while (iter_end != this->end() && iter_end->get_sub_offset() != 0) {
1,125✔
2163
                ++iter_end;
135✔
2164
            }
2165
            this->lf_logline_observer->logline_new_lines(
1,980✔
2166
                *this, iter, iter_end, sbr);
990✔
2167
        });
990✔
2168
    }
2169
    if (this->lf_logfile_observer != nullptr) {
1,260✔
2170
        this->lf_logfile_observer->logfile_indexing(
1,260✔
2171
            this, this->size(), this->size());
1,260✔
2172
        this->lf_logline_observer->logline_eof(*this);
1,260✔
2173
    }
2174
}
1,260✔
2175

2176
std::filesystem::path
2177
logfile::get_path() const
1,948✔
2178
{
2179
    return this->lf_filename;
1,948✔
2180
}
2181

2182
const logline_value_stats*
2183
logfile::stats_for_value(intern_string_t name) const
108✔
2184
{
2185
    const logline_value_stats* retval = nullptr;
108✔
2186
    if (this->lf_format != nullptr) {
108✔
2187
        auto index_opt = this->lf_format->stats_index_for_value(name);
108✔
2188
        if (index_opt.has_value()) {
108✔
2189
            retval = &this->lf_value_stats[index_opt.value()];
108✔
2190
        }
2191
    }
2192

2193
    return retval;
108✔
2194
}
2195

2196
logfile::message_length_result
2197
logfile::message_byte_length(logfile::const_iterator ll, bool include_continues)
71,191✔
2198
{
2199
    auto next_line = ll;
71,191✔
2200
    file_range::metadata meta;
71,191✔
2201
    file_ssize_t retval;
2202
    size_t line_count = 0;
71,191✔
2203

2204
    if (!include_continues && this->lf_next_line_cache) {
71,191✔
2205
        if (ll->get_offset() == (*this->lf_next_line_cache).first) {
27,525✔
2206
            return {
2207
                (file_ssize_t) this->lf_next_line_cache->second,
3,464✔
2208
                1,
2209
                {ll->is_valid_utf(), ll->has_ansi()},
6,928✔
2210
            };
10,392✔
2211
        }
2212
    }
2213

2214
    do {
2215
        line_count += 1;
71,360✔
2216
        meta.m_has_ansi = meta.m_has_ansi || next_line->has_ansi();
71,360✔
2217
        meta.m_valid_utf = meta.m_valid_utf && next_line->is_valid_utf();
71,360✔
2218
        ++next_line;
71,360✔
2219
    } while ((next_line != this->end())
71,360✔
2220
             && ((ll->get_offset() == next_line->get_offset())
135,644✔
2221
                 || (include_continues && next_line->is_continued())));
64,284✔
2222

2223
    if (next_line == this->end()) {
67,727✔
2224
        if (this->lf_upper_bound_size) {
5,182✔
UNCOV
2225
            retval = this->lf_upper_bound_size.value() - ll->get_offset();
×
2226
        } else {
2227
            retval = this->lf_index_size - ll->get_offset();
5,182✔
2228
        }
2229
        if (retval > line_buffer::MAX_LINE_BUFFER_SIZE) {
5,182✔
UNCOV
2230
            retval = line_buffer::MAX_LINE_BUFFER_SIZE;
×
2231
        }
2232
        if (retval > 0 && !this->lf_partial_line) {
5,182✔
2233
            retval -= 1;
4,817✔
2234
        }
2235
    } else {
2236
        retval = next_line->get_offset() - ll->get_offset() - 1;
62,545✔
2237
        if (!include_continues) {
62,545✔
2238
            this->lf_next_line_cache
2239
                = std::make_optional(std::make_pair(ll->get_offset(), retval));
21,594✔
2240
        }
2241
    }
2242

2243
    require_ge(retval, 0);
67,727✔
2244

2245
    return {retval, line_count, meta};
67,727✔
2246
}
2247

2248
Result<shared_buffer_ref, std::string>
2249
logfile::read_raw_message(logfile::const_iterator ll)
73✔
2250
{
2251
    require(ll->get_sub_offset() == 0);
73✔
2252

2253
    return this->lf_line_buffer.read_range(this->get_file_range(ll));
73✔
2254
}
2255

2256
intern_string_t
2257
logfile::get_format_name() const
54,419✔
2258
{
2259
    if (this->lf_format) {
54,419✔
2260
        return this->lf_format->get_name();
54,419✔
2261
    }
2262

UNCOV
2263
    return {};
×
2264
}
2265

2266
std::optional<logfile::const_iterator>
2267
logfile::find_from_time(const timeval& tv) const
2✔
2268
{
2269
    auto retval
2270
        = std::lower_bound(this->lf_index.begin(), this->lf_index.end(), tv);
2✔
2271
    if (retval == this->lf_index.end()) {
2✔
UNCOV
2272
        return std::nullopt;
×
2273
    }
2274

2275
    return retval;
2✔
2276
}
2277

2278
bool
2279
logfile::mark_as_duplicate(const std::string& name)
1✔
2280
{
2281
    safe::WriteAccess<safe_notes> notes(this->lf_notes);
1✔
2282

2283
    if (notes->contains(note_type::duplicate)) {
1✔
UNCOV
2284
        return false;
×
2285
    }
2286

2287
    this->lf_indexing = false;
1✔
2288
    this->lf_options.loo_is_visible = false;
1✔
2289
    auto note_um
2290
        = lnav::console::user_message::warning("hiding duplicate file")
2✔
2291
              .with_reason(
2✔
2292
                  attr_line_t("this file appears to have the same content as ")
2✔
2293
                      .append(lnav::roles::file(name)))
2✔
2294
              .move();
1✔
2295
    notes->insert(note_type::duplicate, note_um);
1✔
2296
    return true;
1✔
2297
}
1✔
2298

2299
void
2300
logfile::adjust_content_time(int line, const timeval& tv, bool abs_offset)
21✔
2301
{
2302
    if (this->lf_time_offset == tv) {
21✔
2303
        return;
8✔
2304
    }
2305

2306
    auto old_time = this->lf_time_offset;
13✔
2307

2308
    this->lf_time_offset_line = line;
13✔
2309
    if (abs_offset) {
13✔
2310
        this->lf_time_offset = tv;
9✔
2311
    } else {
2312
        timeradd(&old_time, &tv, &this->lf_time_offset);
4✔
2313
    }
2314
    for (auto& iter : *this) {
85✔
2315
        timeval curr, diff, new_time;
2316

2317
        curr = iter.get_timeval();
72✔
2318
        timersub(&curr, &old_time, &diff);
72✔
2319
        timeradd(&diff, &this->lf_time_offset, &new_time);
72✔
2320
        iter.set_time(new_time);
72✔
2321
    }
2322
    this->lf_sort_needed = true;
13✔
2323
    this->lf_index_generation += 1;
13✔
2324
}
2325

2326
std::filesystem::path
2327
logfile::get_path_for_key() const
113✔
2328
{
2329
    if (this->lf_options.loo_temp_dev == 0 && this->lf_options.loo_temp_ino == 0
113✔
2330
        && this->lf_line_buffer.is_piper())
226✔
2331
    {
2332
        return this->lf_actual_path.value_or(this->lf_filename);
25✔
2333
    }
2334
    return this->lf_filename;
88✔
2335
}
2336

2337
void
2338
logfile::set_filename(const std::string& filename)
137✔
2339
{
2340
    if (this->lf_filename != filename) {
137✔
2341
        this->lf_filename = filename;
104✔
2342
        this->lf_filename_as_string = this->lf_filename.string();
104✔
2343
        std::filesystem::path p(filename);
104✔
2344
        this->lf_basename = p.filename();
104✔
2345
    }
104✔
2346
}
137✔
2347

2348
time_t
UNCOV
2349
logfile::get_origin_mtime() const
×
2350
{
UNCOV
2351
    if (!this->is_valid_filename()) {
×
2352
        struct stat st;
2353
        if (lnav::filesystem::statp(this->lf_filename, &st) == 0) {
×
UNCOV
2354
            return st.st_mtime;
×
2355
        }
2356
    }
2357

UNCOV
2358
    return this->lf_stat.st_mtime;
×
2359
}
2360

2361
struct timeval
2362
logfile::original_line_time(iterator ll)
172✔
2363
{
2364
    if (this->is_time_adjusted()) {
172✔
2365
        auto line_time = ll->get_timeval();
11✔
2366
        timeval retval;
2367

2368
        timersub(&line_time, &this->lf_time_offset, &retval);
11✔
2369
        return retval;
11✔
2370
    }
2371

2372
    return ll->get_timeval();
161✔
2373
}
2374

2375
std::optional<logfile::const_iterator>
2376
logfile::line_for_offset(file_off_t off) const
5✔
2377
{
2378
    struct cmper {
2379
        bool operator()(const file_off_t& lhs, const logline& rhs) const
2380
        {
2381
            return lhs < rhs.get_offset();
2382
        }
2383

2384
        bool operator()(const logline& lhs, const file_off_t& rhs) const
20✔
2385
        {
2386
            return lhs.get_offset() < rhs;
20✔
2387
        }
2388
    };
2389

2390
    if (this->lf_index.empty()) {
5✔
UNCOV
2391
        return std::nullopt;
×
2392
    }
2393

2394
    auto iter = std::lower_bound(
5✔
2395
        this->lf_index.begin(), this->lf_index.end(), off, cmper{});
2396
    if (iter == this->lf_index.end()) {
5✔
2397
        if (this->lf_index.back().get_offset() <= off
×
UNCOV
2398
            && off < this->lf_index_size)
×
2399
        {
UNCOV
2400
            return std::make_optional(iter);
×
2401
        }
UNCOV
2402
        return std::nullopt;
×
2403
    }
2404

2405
    if (off < iter->get_offset() && iter != this->lf_index.begin()) {
5✔
2406
        --iter;
5✔
2407
    }
2408

2409
    return std::make_optional(iter);
5✔
2410
}
2411

2412
void
2413
logfile::dump_stats()
898✔
2414
{
2415
    const auto buf_stats = this->lf_line_buffer.consume_stats();
898✔
2416

2417
    if (buf_stats.empty()) {
898✔
2418
        return;
610✔
2419
    }
2420
    log_info("line buffer stats for file: %s",
288✔
2421
             this->lf_filename_as_string.c_str());
2422
    log_info("  file_size=%lld", this->lf_line_buffer.get_file_size());
288✔
2423
    log_info("  buffer_size=%ld", this->lf_line_buffer.get_buffer_size());
288✔
2424
    log_info("  read_hist=[%4u %4u %4u %4u %4u %4u %4u %4u %4u %4u]",
288✔
2425
             buf_stats.s_hist[0],
2426
             buf_stats.s_hist[1],
2427
             buf_stats.s_hist[2],
2428
             buf_stats.s_hist[3],
2429
             buf_stats.s_hist[4],
2430
             buf_stats.s_hist[5],
2431
             buf_stats.s_hist[6],
2432
             buf_stats.s_hist[7],
2433
             buf_stats.s_hist[8],
2434
             buf_stats.s_hist[9]);
2435
    log_info("  decompressions=%u", buf_stats.s_decompressions);
288✔
2436
    log_info("  preads=%u", buf_stats.s_preads);
288✔
2437
    log_info("  requested_preloads=%u", buf_stats.s_requested_preloads);
288✔
2438
    log_info("  used_preloads=%u", buf_stats.s_used_preloads);
288✔
2439
}
2440

2441
void
2442
logfile::set_logline_opid(uint32_t line_number, string_fragment opid)
17✔
2443
{
2444
    if (line_number >= this->lf_index.size()) {
17✔
2445
        log_error("invalid line number: %u", line_number);
×
UNCOV
2446
        return;
×
2447
    }
2448

2449
    auto bm_iter = this->lf_bookmark_metadata.find(line_number);
17✔
2450
    if (bm_iter != this->lf_bookmark_metadata.end()) {
17✔
2451
        if (bm_iter->second.bm_opid == opid) {
×
UNCOV
2452
            return;
×
2453
        }
2454
    }
2455

2456
    auto write_opids = this->lf_opids.writeAccess();
17✔
2457

2458
    if (bm_iter != this->lf_bookmark_metadata.end()
17✔
2459
        && !bm_iter->second.bm_opid.empty())
17✔
2460
    {
2461
        auto old_opid_iter = write_opids->los_opid_ranges.find(opid);
×
2462
        if (old_opid_iter != write_opids->los_opid_ranges.end()) {
×
UNCOV
2463
            this->lf_invalidated_opids.insert(old_opid_iter->first);
×
2464
        }
2465
    }
2466

2467
    auto& ll = this->lf_index[line_number];
17✔
2468
    auto log_us = ll.get_time<std::chrono::microseconds>();
17✔
2469
    auto opid_iter = write_opids->insert_op(
17✔
2470
        this->lf_allocator, opid, log_us, timestamp_point_of_reference_t::send);
17✔
2471
    auto& otr = opid_iter->second;
17✔
2472

2473
    otr.otr_level_stats.update_msg_count(ll.get_msg_level());
17✔
2474
    ll.merge_bloom_bits(opid.bloom_bits());
17✔
2475
    this->lf_bookmark_metadata[line_number].bm_opid = opid.to_string();
17✔
2476
}
17✔
2477

2478
void
2479
logfile::set_opid_description(string_fragment opid, string_fragment desc)
4✔
2480
{
2481
    auto opid_guard = this->lf_opids.writeAccess();
4✔
2482

2483
    auto opid_iter = opid_guard->los_opid_ranges.find(opid);
4✔
2484
    if (opid_iter == opid_guard->los_opid_ranges.end()) {
4✔
UNCOV
2485
        return;
×
2486
    }
2487
    opid_iter->second.otr_description.lod_index = std::nullopt;
4✔
2488
    opid_iter->second.otr_description.lod_elements.clear();
4✔
2489
    opid_iter->second.otr_description.lod_elements.insert(0, desc.to_string());
4✔
2490
}
4✔
2491

2492
void
UNCOV
2493
logfile::clear_logline_opid(uint32_t line_number)
×
2494
{
2495
    if (line_number >= this->lf_index.size()) {
×
UNCOV
2496
        return;
×
2497
    }
2498

2499
    auto iter = this->lf_bookmark_metadata.find(line_number);
×
2500
    if (iter == this->lf_bookmark_metadata.end()) {
×
UNCOV
2501
        return;
×
2502
    }
2503

2504
    if (iter->second.bm_opid.empty()) {
×
UNCOV
2505
        return;
×
2506
    }
2507

2508
    auto& ll = this->lf_index[line_number];
×
2509
    auto opid = std::move(iter->second.bm_opid);
×
UNCOV
2510
    auto opid_sf = string_fragment::from_str(opid);
×
2511

2512
    if (iter->second.empty(bookmark_metadata::categories::any)) {
×
UNCOV
2513
        this->lf_bookmark_metadata.erase(iter);
×
2514

UNCOV
2515
        auto writeOpids = this->lf_opids.writeAccess();
×
2516

2517
        auto otr_iter = writeOpids->los_opid_ranges.find(opid_sf);
×
2518
        if (otr_iter == writeOpids->los_opid_ranges.end()) {
×
UNCOV
2519
            return;
×
2520
        }
2521

2522
        if (otr_iter->second.otr_range.tr_begin
×
2523
                != ll.get_time<std::chrono::microseconds>()
×
2524
            && otr_iter->second.otr_range.tr_end
×
UNCOV
2525
                != ll.get_time<std::chrono::microseconds>())
×
2526
        {
UNCOV
2527
            otr_iter->second.otr_level_stats.update_msg_count(
×
2528
                ll.get_msg_level(), -1);
UNCOV
2529
            return;
×
2530
        }
2531

2532
        otr_iter->second.clear();
×
UNCOV
2533
        this->lf_invalidated_opids.insert(opid_sf);
×
2534
    }
2535
}
2536

2537
size_t
2538
logfile::estimated_remaining_lines() const
6,514✔
2539
{
2540
    if (this->lf_index.empty() || this->is_compressed()) {
6,514✔
2541
        return 10;
649✔
2542
    }
2543

2544
    const auto bytes_per_line = this->lf_index_size / this->lf_index.size();
5,865✔
2545
    if (this->lf_index_size > this->lf_stat.st_size) {
5,865✔
2546
        return 0;
14✔
2547
    }
2548
    const auto remaining_bytes = this->lf_stat.st_size - this->lf_index_size;
5,851✔
2549

2550
    return remaining_bytes / bytes_per_line;
5,851✔
2551
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc