• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jtmoon79 / super-speedy-syslog-searcher / 17694315272

13 Sep 2025 08:41AM UTC coverage: 57.903% (-0.3%) from 58.19%
17694315272

push

github

jtmoon79
(LIB) (BIN) NFC rustfmt

389 of 839 new or added lines in 16 files covered. (46.36%)

876 existing lines in 9 files now uncovered.

11942 of 20624 relevant lines covered (57.9%)

22028.79 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

63.38
/src/readers/syslogprocessor.rs
1
// src/readers/syslogprocessor.rs
2
// …
3

4
//! Implements a [`SyslogProcessor`], the driver of the processing stages for
5
//! a "syslog" file using a [`SyslineReader`].
6
//!
7
//! A "syslog" file in this context means any text-based file with logged
8
//! messages with a datetime stamp.
9
//! The file may use a formally defined log message format (e.g. RFC 5424)
10
//! or an ad-hoc log message format (most log files).<br/>
11
//! The two common assumptions are that:
12
//! 1. each log message has a datetime stamp on the first line
13
//! 2. log messages are in chronological order
14
//!
15
//! Sibling of [`FixedStructReader`]. But far more complicated due to the
16
//! ad-hoc nature of log files.
17
//!
18
//! This is an _s4lib_ structure used by the binary program _s4_.
19
//!
20
//! [`FixedStructReader`]: crate::readers::fixedstructreader::FixedStructReader
21
//! [`SyslineReader`]: crate::readers::syslinereader::SyslineReader
22
//! [`SyslogProcessor`]: SyslogProcessor
23

24
#![allow(non_snake_case)]
25

26
use std::fmt;
27
use std::fmt::Debug;
28
use std::io::{
29
    Error,
30
    ErrorKind,
31
    Result,
32
};
33
use std::time::Duration as StdDuration;
34

35
use ::chrono::Datelike;
36
use ::lazy_static::lazy_static;
37
use ::rangemap::RangeMap;
38
use ::si_trace_print::{
39
    def1n,
40
    def1x,
41
    def1ñ,
42
    defn,
43
    defo,
44
    defx,
45
    defñ,
46
};
47

48
use crate::common::{
49
    Count,
50
    FPath,
51
    FileOffset,
52
    FileProcessingResult,
53
    FileSz,
54
    FileType,
55
    SYSLOG_SZ_MAX,
56
};
57
use crate::data::datetime::{
58
    datetime_minus_systemtime,
59
    dt_after_or_before,
60
    systemtime_to_datetime,
61
    DateTimeL,
62
    DateTimeLOpt,
63
    Duration,
64
    FixedOffset,
65
    Result_Filter_DateTime1,
66
    SystemTime,
67
    Year,
68
    UPTIME_DEFAULT_OFFSET,
69
};
70
use crate::data::sysline::SyslineP;
71
#[cfg(test)]
72
use crate::readers::blockreader::SetDroppedBlocks;
73
use crate::readers::blockreader::{
74
    BlockIndex,
75
    BlockOffset,
76
    BlockP,
77
    BlockSz,
78
    ResultFindReadBlock,
79
};
80
#[doc(hidden)]
81
pub use crate::readers::linereader::ResultFindLine;
82
#[cfg(test)]
83
use crate::readers::linereader::SetDroppedLines;
84
use crate::readers::summary::Summary;
85
#[cfg(test)]
86
use crate::readers::syslinereader::SetDroppedSyslines;
87
#[doc(hidden)]
88
pub use crate::readers::syslinereader::{
89
    DateTimePatternCounts,
90
    ResultFindSysline,
91
    SummarySyslineReader,
92
    SyslineReader,
93
};
94
use crate::{
95
    de_err,
96
    de_wrn,
97
    e_err,
98
};
99

100
// ---------------
101
// SyslogProcessor
102

103
/// `SYSLOG_SZ_MAX` as a `BlockSz`.
104
pub(crate) const SYSLOG_SZ_MAX_BSZ: BlockSz = SYSLOG_SZ_MAX as BlockSz;
105

106
/// Typed [`FileProcessingResult`] for "block zero analysis".
107
///
108
/// [`FileProcessingResult`]: crate::common::FileProcessingResult
109
pub type FileProcessingResultBlockZero = FileProcessingResult<std::io::Error>;
110

111
/// Enum for the [`SyslogProcessor`] processing stages. Each file processed
112
/// advances through these stages. Sometimes stages may be skipped.
113
///
114
/// [`SyslogProcessor`]: self::SyslogProcessor
115
#[derive(Debug, Eq, Ord, PartialEq, PartialOrd)]
116
pub enum ProcessingStage {
117
    /// Does the file exist and is it a parseable type?
118
    Stage0ValidFileCheck,
119
    /// Check file can be parsed by trying to parse it. Determine the
120
    /// datetime patterns of any found [`Sysline`s].<br/>
121
    /// If no `Sysline`s are found then advance to `Stage4Summary`.
122
    ///
123
    /// [`Sysline`s]: crate::data::sysline::Sysline
124
    Stage1BlockzeroAnalysis,
125
    /// Find the first [`Sysline`] in the syslog file.<br/>
126
    /// If passed CLI option `--after` then find the first `Sysline` with
127
    /// datetime at or after the user-passed [`DateTimeL`].
128
    ///
129
    /// [`Sysline`]: crate::data::sysline::Sysline
130
    /// [`DateTimeL`]: crate::data::datetime::DateTimeL
131
    Stage2FindDt,
132
    /// Advanced through the syslog file to the end.<br/>
133
    /// If passed CLI option `--before` then process up to
134
    /// the last [`Sysline`] with datetime at or before the user-passed
135
    /// [`DateTimeL`]. Otherwise, process all remaining Syslines.
136
    ///
137
    /// While advancing, try to [`drop`] previously processed data `Block`s,
138
    /// `Line`s, and `Sysline`s to lessen memory allocated.
139
    /// a.k.a. "_streaming stage_".
140
    ///
141
    /// Also see function [`find_sysline`].
142
    ///
143
    /// [`Sysline`]: crate::data::sysline::Sysline
144
    /// [`DateTimeL`]: crate::data::datetime::DateTimeL
145
    /// [`find_sysline`]: self::SyslogProcessor#method.find_sysline
146
    /// [`drop`]: self::SyslogProcessor#method.drop_data_try
147
    Stage3StreamSyslines,
148
    /// If passed CLI option `--summary` then print a summary of
149
    /// various information about the processed file.
150
    Stage4Summary,
151
}
152

153
/// [`BlockSz`] in a [`Range`].
154
///
155
/// [`Range`]: std::ops::Range
156
/// [`BlockSz`]: crate::readers::blockreader::BlockSz
157
type BszRange = std::ops::Range<BlockSz>;
158

159
/// Map [`BlockSz`] to a [`Count`].
160
///
161
/// [`BlockSz`]: crate::readers::blockreader::BlockSz
162
/// [`Count`]: crate::common::Count
163
type MapBszRangeToCount = RangeMap<u64, Count>;
164

165
lazy_static! {
166
    /// For files in `blockzero_analyis`, the number of [`Line`]s needed to
167
    /// be found within block zero.
168
    ///
169
    /// [`Line`]: crate::data::line::Line
170
    pub static ref BLOCKZERO_ANALYSIS_LINE_COUNT_MIN_MAP: MapBszRangeToCount = {
171
        defñ!("lazy_static! BLOCKZERO_ANALYSIS_LINE_COUNT_MIN_MAP::new()");
172

173
        let mut m = MapBszRangeToCount::new();
174
        m.insert(BszRange{start: 0, end: SYSLOG_SZ_MAX_BSZ}, 1);
175
        m.insert(BszRange{start: SYSLOG_SZ_MAX_BSZ, end: SYSLOG_SZ_MAX_BSZ * 3}, 3);
176
        m.insert(BszRange{start: SYSLOG_SZ_MAX_BSZ * 3, end: BlockSz::MAX}, 3);
177

178
        m
179
    };
180

181
    /// For files in `blockzero_analyis`, the number of [`Sysline`]s needed to
182
    /// be found within block zero.
183
    ///
184
    /// [`Sysline`]: crate::data::sysline::Sysline
185
    pub static ref BLOCKZERO_ANALYSIS_SYSLINE_COUNT_MIN_MAP: MapBszRangeToCount = {
186
        defñ!("lazy_static! BLOCKZERO_ANALYSIS_SYSLINE_COUNT_MIN_MAP::new()");
187

188
        let mut m = MapBszRangeToCount::new();
189
        m.insert(BszRange{start: 0, end: SYSLOG_SZ_MAX_BSZ}, 1);
190
        m.insert(BszRange{start: SYSLOG_SZ_MAX_BSZ, end: BlockSz::MAX}, 2);
191

192
        m
193
    };
194

195
    /// 25 hours.
196
    /// For processing syslog files without a year.
197
    /// If there is a datetime jump backwards more than this value then
198
    /// a year rollover happened.
199
    ///
200
    /// e.g. given log messages
201
    ///     Dec 31 23:59:59 [INFO] One!
202
    ///     Jan 1 00:00:00 [INFO] Happy New Year!!!
203
    /// These messages interpreted as the same year would be a jump backwards
204
    /// in time.
205
    /// Of course, this apparent "jump backwards" means the year changed.
206
    // XXX: cannot make `const` because `secs` is a private field
207
    static ref BACKWARDS_TIME_JUMP_MEANS_NEW_YEAR: Duration = Duration::try_seconds(60 * 60 * 25).unwrap();
208
}
209

210
/// The `SyslogProcessor` uses [`SyslineReader`] to find [`Sysline`s] in a file.
211
///
212
/// A `SyslogProcessor` has knowledge of:
213
/// - the different stages of processing a syslog file
214
/// - stores optional datetime filters and searches with them
215
/// - handles special cases of a syslog file with a datetime format without a
216
///   year
217
///
218
/// A `SyslogProcessor` is driven by a thread to fully process one syslog file.
219
///
220
/// During "[streaming stage]", the `SyslogProcessor` will proactively `drop`
221
/// data that has been processed and printed. It does so by calling
222
/// private function [`drop_data_try`] during function [`find_sysline`].
223
///
224
/// A `SyslogProcessor` presumes syslog messages are in chronological order.
225
///
226
/// [`Sysline`s]: crate::data::sysline::Sysline
227
/// [`SyslineReader`]: crate::readers::syslinereader::SyslineReader
228
/// [`LineReader`]: crate::readers::linereader::LineReader
229
/// [`BlockReader`]: crate::readers::blockreader::BlockReader
230
/// [`drop_data_try`]: self::SyslogProcessor#method.drop_data_try
231
/// [`find_sysline`]: self::SyslogProcessor#method.find_sysline
232
/// [streaming stage]: self::ProcessingStage#variant.Stage3StreamSyslines
233
pub struct SyslogProcessor {
234
    syslinereader: SyslineReader,
235
    /// Current `ProcessingStage`.
236
    processingstage: ProcessingStage,
237
    /// `FPath`.
238
    // TODO: remove this, use the `BlockReader` path, (DRY)
239
    path: FPath,
240
    // TODO: remove this, use the `BlockReader` blocksz, (DRY)
241
    blocksz: BlockSz,
242
    /// `FixedOffset` timezone for datetime formats without a timezone.
243
    tz_offset: FixedOffset,
244
    /// Optional filter, syslines _after_ this `DateTimeL`.
245
    filter_dt_after_opt: DateTimeLOpt,
246
    /// Optional filter, syslines _before_ this `DateTimeL`.
247
    filter_dt_before_opt: DateTimeLOpt,
248
    /// Internal sanity check, has `self.blockzero_analysis()` completed?
249
    blockzero_analysis_done: bool,
250
    /// Internal tracking of last `blockoffset` passed to `drop_block`.
251
    drop_block_last: BlockOffset,
252
    /// Optional `Year` value used to start `process_missing_year()`.
253
    /// Only needed for syslog files with datetime format without a year.
254
    missing_year: Option<Year>,
255
    /// The last [`Error`], if any, as a `String`. Set by [`set_error`].
256
    ///
257
    /// Annoyingly, cannot [Clone or Copy `Error`].
258
    ///
259
    /// [`Error`]: std::io::Error
260
    /// [Clone or Copy `Error`]: https://github.com/rust-lang/rust/issues/24135
261
    /// [`set_error`]: self::SyslogProcessor#method.set_error
262
    // TRACKING: https://github.com/rust-lang/rust/issues/24135
263
    error: Option<String>,
264
}
265

266
impl Debug for SyslogProcessor {
267
    fn fmt(
×
268
        &self,
×
269
        f: &mut fmt::Formatter,
×
270
    ) -> fmt::Result {
×
271
        f.debug_struct("SyslogProcessor")
×
272
            .field("Path", &self.path)
×
273
            .field("Processing Stage", &self.processingstage)
×
274
            .field("BlockSz", &self.blocksz)
×
275
            .field("TimeOffset", &self.tz_offset)
×
276
            .field("filter_dt_after_opt", &self.filter_dt_after_opt)
×
277
            .field("filter_dt_before_opt", &self.filter_dt_before_opt)
×
278
            .field("BO Analysis done?", &self.blockzero_analysis_done)
×
279
            .field("filetype", &self.filetype())
×
280
            .field("Reprocessed missing year?", &self.did_process_missing_year())
×
281
            .field("Missing Year", &self.missing_year)
×
282
            .field("Error?", &self.error)
×
283
            .finish()
×
284
    }
×
285
}
286

287
// TODO: [2023/04] remove redundant variable prefix name `syslogprocessor_`
288
#[derive(Clone, Debug, Default, Eq, PartialEq)]
289
pub struct SummarySyslogProcessor {
290
    /// `SyslogProcessor::missing_year`
291
    pub syslogprocessor_missing_year: Option<Year>,
292
}
293

294
impl SyslogProcessor {
295
    /// `SyslogProcessor` has it's own miminum requirements for `BlockSz`.
296
    ///
297
    /// Necessary for `blockzero_analysis` functions to have chance at success.
298
    #[doc(hidden)]
299
    #[cfg(any(debug_assertions, test))]
300
    pub const BLOCKSZ_MIN: BlockSz = 0x2;
301

302
    /// Maximum number of datetime patterns for matching the remainder of a syslog file.
303
    const DT_PATTERN_MAX: usize = SyslineReader::DT_PATTERN_MAX;
304

305
    /// `SyslogProcessor` has it's own miminum requirements for `BlockSz`.
306
    ///
307
    /// Necessary for `blockzero_analysis` functions to have chance at success.
308
    #[cfg(not(any(debug_assertions, test)))]
309
    pub const BLOCKSZ_MIN: BlockSz = 0x40;
310

311
    /// Minimum number of bytes needed to perform `blockzero_analysis_bytes`.
312
    ///
313
    /// Pretty sure this is smaller than the smallest possible timestamp that
314
    /// can be processed by the `DTPD!` in `DATETIME_PARSE_DATAS`.
315
    /// In other words, a file that only has a datetimestamp followed by an
316
    /// empty log message.
317
    ///
318
    /// It's okay if this is too small as the later processing stages will
319
    /// be certain of any possible datetime patterns.
320
    pub const BLOCKZERO_ANALYSIS_BYTES_MIN: BlockSz = 6;
321

322
    /// If the first number of bytes are zero bytes (NULL bytes) then
323
    /// stop processing the file. It's extremely unlikely this is a syslog
324
    /// file and more likely it's some sort of binary data file.
325
    pub const BLOCKZERO_ANALYSIS_BYTES_NULL_MAX: usize = 128;
326

327
    /// Allow "streaming stage" to drop data?
328
    /// Compile-time "option" to aid manual debugging.
329
    #[doc(hidden)]
330
    const STREAM_STAGE_DROP: bool = true;
331

332
    /// Use LRU caches in underlying components?
333
    ///
334
    /// XXX: For development and testing experiments!
335
    #[doc(hidden)]
336
    const LRU_CACHE_ENABLE: bool = true;
337

338
    /// Create a new `SyslogProcessor`.
339
    ///
340
    /// **NOTE:** should not attempt any block reads here,
341
    /// similar to other `*Readers::new()`
342
    pub fn new(
98✔
343
        path: FPath,
98✔
344
        filetype: FileType,
98✔
345
        blocksz: BlockSz,
98✔
346
        tz_offset: FixedOffset,
98✔
347
        filter_dt_after_opt: DateTimeLOpt,
98✔
348
        filter_dt_before_opt: DateTimeLOpt,
98✔
349
    ) -> Result<SyslogProcessor> {
98✔
350
        def1n!("({:?}, {:?}, {:?}, {:?})", path, filetype, blocksz, tz_offset);
98✔
351
        if blocksz < SyslogProcessor::BLOCKSZ_MIN {
98✔
352
            return Result::Err(
×
353
                Error::new(
×
354
                    ErrorKind::InvalidInput,
×
355
                    format!(
×
356
                        "BlockSz {0} (0x{0:08X}) is too small, SyslogProcessor has BlockSz minimum {1} (0x{1:08X}) file {2:?}",
×
357
                        blocksz, SyslogProcessor::BLOCKSZ_MIN, &path,
×
358
                    )
×
359
                )
×
360
            );
×
361
        }
98✔
362
        let path_ = path.clone();
98✔
363
        let mut slr = match SyslineReader::new(path, filetype, blocksz, tz_offset) {
98✔
364
            Ok(val) => val,
96✔
365
            Err(err) => {
2✔
366
                def1x!();
2✔
367
                return Result::Err(err);
2✔
368
            }
369
        };
370

371
        if !SyslogProcessor::LRU_CACHE_ENABLE {
96✔
372
            slr.LRU_cache_disable();
×
373
            slr.linereader
×
374
                .LRU_cache_disable();
×
375
            slr.linereader
×
376
                .blockreader
×
377
                .LRU_cache_disable();
×
378
        }
96✔
379

380
        def1x!("return Ok(SyslogProcessor)");
96✔
381

382
        Result::Ok(SyslogProcessor {
96✔
383
            syslinereader: slr,
96✔
384
            processingstage: ProcessingStage::Stage0ValidFileCheck,
96✔
385
            path: path_,
96✔
386
            blocksz,
96✔
387
            tz_offset,
96✔
388
            filter_dt_after_opt,
96✔
389
            filter_dt_before_opt,
96✔
390
            blockzero_analysis_done: false,
96✔
391
            drop_block_last: 0,
96✔
392
            missing_year: None,
96✔
393
            error: None,
96✔
394
        })
96✔
395
    }
98✔
396

397
    /// `Count` of [`Line`s] processed.
398
    ///
399
    /// [`Line`s]: crate::data::line::Line
400
    #[inline(always)]
401
    #[allow(dead_code)]
402
    pub fn count_lines(&self) -> Count {
×
403
        self.syslinereader
×
404
            .linereader
×
405
            .count_lines_processed()
×
406
    }
×
407

408
    /// See [`Sysline::count_syslines_stored`].
409
    ///
410
    /// [`Sysline::count_syslines_stored`]: crate::data::sysline::Sysline::count_syslines_stored
411
    #[cfg(test)]
412
    pub fn count_syslines_stored(&self) -> Count {
4✔
413
        self.syslinereader.count_syslines_stored()
4✔
414
    }
4✔
415

416
    /// See [`BlockReader::blocksz`].
417
    ///
418
    /// [`BlockReader::blocksz`]: crate::readers::blockreader::BlockReader#method.blocksz
419
    #[inline(always)]
420
    pub const fn blocksz(&self) -> BlockSz {
77✔
421
        self.syslinereader.blocksz()
77✔
422
    }
77✔
423

424
    /// See [`BlockReader::filesz`].
425
    ///
426
    /// [`BlockReader::filesz`]: crate::readers::blockreader::BlockReader#method.filesz
427
    #[inline(always)]
428
    pub const fn filesz(&self) -> FileSz {
80✔
429
        self.syslinereader.filesz()
80✔
430
    }
80✔
431

432
    /// See [`BlockReader::filetype`].
433
    ///
434
    /// [`BlockReader::filetype`]: crate::readers::blockreader::BlockReader#method.filetype
435
    #[inline(always)]
436
    pub const fn filetype(&self) -> FileType {
4✔
437
        self.syslinereader.filetype()
4✔
438
    }
4✔
439

440
    /// See [`BlockReader::path`].
441
    ///
442
    /// [`BlockReader::path`]: crate::readers::blockreader::BlockReader#method.path
443
    #[inline(always)]
444
    #[allow(dead_code)]
445
    pub const fn path(&self) -> &FPath {
4✔
446
        self.syslinereader.path()
4✔
447
    }
4✔
448

449
    /// See [`BlockReader::block_offset_at_file_offset`].
450
    ///
451
    /// [`BlockReader::block_offset_at_file_offset`]: crate::readers::blockreader::BlockReader#method.block_offset_at_file_offset
452
    #[allow(dead_code)]
453
    pub const fn block_offset_at_file_offset(
×
454
        &self,
×
455
        fileoffset: FileOffset,
×
456
    ) -> BlockOffset {
×
457
        self.syslinereader
×
458
            .block_offset_at_file_offset(fileoffset)
×
459
    }
×
460

461
    /// See [`BlockReader::file_offset_at_block_offset`].
462
    ///
463
    /// [`BlockReader::file_offset_at_block_offset`]: crate::readers::blockreader::BlockReader#method.file_offset_at_block_offset
464
    #[allow(dead_code)]
465
    pub const fn file_offset_at_block_offset(
×
466
        &self,
×
467
        blockoffset: BlockOffset,
×
468
    ) -> FileOffset {
×
469
        self.syslinereader
×
470
            .file_offset_at_block_offset(blockoffset)
×
471
    }
×
472

473
    /// See [`BlockReader::file_offset_at_block_offset_index`].
474
    ///
475
    /// [`BlockReader::file_offset_at_block_offset_index`]: crate::readers::blockreader::BlockReader#method.file_offset_at_block_offset_index
476
    #[allow(dead_code)]
477
    pub const fn file_offset_at_block_offset_index(
×
478
        &self,
×
479
        blockoffset: BlockOffset,
×
480
        blockindex: BlockIndex,
×
481
    ) -> FileOffset {
×
482
        self.syslinereader
×
483
            .file_offset_at_block_offset_index(blockoffset, blockindex)
×
484
    }
×
485

486
    /// See [`BlockReader::block_index_at_file_offset`].
487
    ///
488
    /// [`BlockReader::block_index_at_file_offset`]: crate::readers::blockreader::BlockReader#method.block_index_at_file_offset
489
    #[allow(dead_code)]
490
    pub const fn block_index_at_file_offset(
×
491
        &self,
×
492
        fileoffset: FileOffset,
×
493
    ) -> BlockIndex {
×
494
        self.syslinereader
×
495
            .block_index_at_file_offset(fileoffset)
×
496
    }
×
497

498
    /// See [`BlockReader::count_blocks`].
499
    ///
500
    /// [`BlockReader::count_blocks`]: crate::readers::blockreader::BlockReader#method.count_blocks
501
    #[allow(dead_code)]
502
    pub const fn count_blocks(&self) -> Count {
×
503
        self.syslinereader
×
504
            .count_blocks()
×
505
    }
×
506

507
    /// See [`BlockReader::blockoffset_last`].
508
    ///
509
    /// [`BlockReader::blockoffset_last`]: crate::readers::blockreader::BlockReader#method.blockoffset_last
510
    #[allow(dead_code)]
511
    pub const fn blockoffset_last(&self) -> BlockOffset {
×
512
        self.syslinereader
×
513
            .blockoffset_last()
×
514
    }
×
515

516
    /// See [`BlockReader::fileoffset_last`].
517
    ///
518
    /// [`BlockReader::fileoffset_last`]: crate::readers::blockreader::BlockReader#method.fileoffset_last
519
    pub const fn fileoffset_last(&self) -> FileOffset {
5✔
520
        self.syslinereader
5✔
521
            .fileoffset_last()
5✔
522
    }
5✔
523

524
    /// See [`LineReader::charsz`].
525
    ///
526
    /// [`LineReader::charsz`]: crate::readers::linereader::LineReader#method.charsz
527
    #[allow(dead_code)]
528
    pub const fn charsz(&self) -> usize {
5✔
529
        self.syslinereader.charsz()
5✔
530
    }
5✔
531

532
    /// See [`BlockReader::mtime`].
533
    ///
534
    /// [`BlockReader::mtime`]: crate::readers::blockreader::BlockReader#method.mtime
535
    pub fn mtime(&self) -> SystemTime {
4✔
536
        self.syslinereader.mtime()
4✔
537
    }
4✔
538

539
    /// Did this `SyslogProcessor` run `process_missing_year()` ?
540
    fn did_process_missing_year(&self) -> bool {
5✔
541
        self.missing_year.is_some()
5✔
542
    }
5✔
543

544
    /// Did this `SyslogProcessor` run `process_uptime()` ?
545
    fn did_process_uptime(&self) -> bool {
×
546
        self.systemtime_at_uptime_zero().is_some()
×
547
    }
×
548

549
    /// Return `drop_data` value.
550
    pub const fn is_drop_data(&self) -> bool {
38✔
551
        self.syslinereader.is_drop_data()
38✔
552
    }
38✔
553

554
    /// store an `Error` that occurred. For later printing during `--summary`.
555
    // XXX: duplicates `FixedStructReader.set_error`
556
    fn set_error(
×
557
        &mut self,
×
558
        error: &Error,
×
559
    ) {
×
560
        def1ñ!("{:?}", error);
×
561
        let mut error_string: String = error.kind().to_string();
×
562
        error_string.push_str(": ");
×
563
        error_string.push_str(error.kind().to_string().as_str());
×
564
        // print the error but avoid printing the same error more than once
565
        // XXX: This is somewhat a hack as it's possible the same error, with the
566
        //      the same error message, could occur more than once.
567
        //      Considered another way, this function `set_error` may get called
568
        //      too often. The responsibility for calling `set_error` is haphazard.
569
        match &self.error {
×
570
            Some(err_s) => {
×
571
                if err_s != &error_string {
×
572
                    e_err!("{}", error);
×
573
                }
×
574
            }
575
            None => {
×
576
                e_err!("{}", error);
×
577
            }
×
578
        }
579
        if let Some(ref _err) = self.error {
×
580
            de_wrn!("skip overwrite of previous Error {:?} with Error ({:?})", _err, error);
×
581
            return;
×
582
        }
×
583
        self.error = Some(error_string);
×
584
    }
×
585

586
    /// Syslog files wherein the datetime format that does not include a year
587
    /// must have special handling.
588
    ///
589
    /// The last [`Sysline`] in the file is presumed to share the same year as
590
    /// the `mtime` (stored by the underlying [`BlockReader`] instance).
591
    /// The entire file is read from end to beginning (in reverse) (unless
592
    /// a `filter_dt_after_opt` is passed that coincides with the found
593
    /// syslines). The year is tracked and updated for each sysline.
594
    /// If there is jump backwards in time, that is presumed to be a
595
    /// year changeover.
596
    ///
597
    /// For example, given syslog contents
598
    ///
599
    /// ```text
600
    /// Nov 1 12:00:00 hello
601
    /// Dec 1 12:00:00 good morning
602
    /// Jan 1 12:00:00 goodbye
603
    /// ```
604
    ///
605
    /// and file `mtime` that is datetime _January 1 12:00:00 2015_,
606
    /// then the last `Sysline` "Jan 1 12:00:00 goodbye" is presumed to be in
607
    /// year 2015.
608
    /// The preceding `Sysline` "Dec 1 12:00:00 goodbye" is then processed.
609
    /// An apparent backwards jump is seen _Jan 1_ to _Dec 1_.
610
    /// From this, it can be concluded the _Dec 1_ refers to a prior year, 2014.
611
    ///
612
    /// Typically, when a datetime filter is passed, a special binary search is
613
    /// done to find the desired syslog line, reducing resource usage. Whereas,
614
    /// files processed here must be read linearly and in their entirety
615
    /// Or, if `filter_dt_after_opt` is passed then the file is read to the
616
    /// first `sysline.dt()` (datetime) that is
617
    /// `Result_Filter_DateTime1::OccursBefore` the
618
    /// `filter_dt_after_opt`.
619
    ///
620
    /// [`Sysline`]: crate::data::sysline::Sysline
621
    /// [`BlockReader`]: crate::readers::blockreader::BlockReader
622
    /// [`DateTimeL`]: crate::data::datetime::DateTimeL
623
    // BUG: does not revise year guesstimation based on encountering leap date February 29
624
    //      See Issue #245
625
    pub fn process_missing_year(
5✔
626
        &mut self,
5✔
627
        mtime: SystemTime,
5✔
628
        filter_dt_after_opt: &DateTimeLOpt,
5✔
629
    ) -> FileProcessingResultBlockZero {
5✔
630
        defn!("({:?}, {:?})", mtime, filter_dt_after_opt);
5✔
631
        debug_assert!(!self.did_process_missing_year(), "process_missing_year() must only be called once");
5✔
632
        let dt_mtime: DateTimeL = systemtime_to_datetime(&self.tz_offset, &mtime);
5✔
633
        defo!("converted dt_mtime {:?}", dt_mtime);
5✔
634
        let year: Year = dt_mtime.date_naive().year() as Year;
5✔
635
        self.missing_year = Some(year);
5✔
636
        defo!("converted missing_year {:?}", self.missing_year);
5✔
637
        let mut year_opt: Option<Year> = Some(year);
5✔
638
        defo!("year_opt {:?}", year_opt);
5✔
639
        let charsz_fo: FileOffset = self.charsz() as FileOffset;
5✔
640

641
        // The previously stored `Sysline`s have a filler year that is most likely incorrect.
642
        // The underlying `Sysline` instance cannot be updated behind an `Arc`.
643
        // Those syslines must be dropped and the entire file processed again.
644
        // However, underlying `Line` and `Block` are still valid; do not reprocess those.
645
        self.syslinereader
5✔
646
            .clear_syslines();
5✔
647

648
        // read all syslines in reverse
649
        let mut fo_prev: FileOffset = self.fileoffset_last();
5✔
650
        let mut syslinep_prev_opt: Option<SyslineP> = None;
5✔
651
        loop {
652
            let syslinep: SyslineP = match self
16✔
653
                .syslinereader
16✔
654
                .find_sysline_year(fo_prev, &year_opt)
16✔
655
            {
656
                ResultFindSysline::Found((_fo, syslinep)) => {
16✔
657
                    defo!(
16✔
658
                        "Found {} Sysline @[{}, {}] datetime: {:?})",
16✔
659
                        _fo,
660
                        (*syslinep).fileoffset_begin(),
16✔
661
                        (*syslinep).fileoffset_end(),
16✔
662
                        (*syslinep).dt()
16✔
663
                    );
664
                    syslinep
16✔
665
                }
666
                ResultFindSysline::Done => {
×
667
                    defo!("Done, break;");
×
668
                    break;
×
669
                }
670
                ResultFindSysline::Err(err) => {
×
671
                    self.set_error(&err);
×
672
                    defx!("return FileErrIo({:?})", err);
×
673
                    return FileProcessingResultBlockZero::FileErrIoPath(err);
×
674
                }
675
            };
676
            // TODO: [2022/07/27] add fn `syslinereader.find_sysline_year_rev` to hide these char offset
677
            //       details (put them into a struct that is meant to understand these details)
678
            let fo_prev_prev: FileOffset = fo_prev;
16✔
679
            fo_prev = (*syslinep).fileoffset_begin();
16✔
680
            // check if datetime has suddenly jumped backwards.
681
            // if date has jumped backwards, then remove sysline, update the year, and process the file
682
            // from that fileoffset again
683
            match syslinep_prev_opt {
16✔
684
                Some(syslinep_prev) => {
11✔
685
                    // normally `dt_cur` should have a datetime *before or equal* to `dt_prev`
686
                    // but if not, then there was probably a year rollover
687
                    if (*syslinep).dt() > (*syslinep_prev).dt() {
11✔
688
                        let diff: Duration = *(*syslinep).dt() - *(*syslinep_prev).dt();
×
689
                        if diff > *BACKWARDS_TIME_JUMP_MEANS_NEW_YEAR {
×
690
                            year_opt = Some(year_opt.unwrap() - 1);
×
691
                            defo!("year_opt updated {:?}", year_opt);
×
692
                            self.syslinereader
×
693
                                .remove_sysline(fo_prev);
×
694
                            fo_prev = fo_prev_prev;
×
695
                            syslinep_prev_opt = Some(syslinep_prev.clone());
×
696
                            continue;
×
697
                        }
×
698
                    }
11✔
699
                }
700
                None => {}
5✔
701
            }
702
            if fo_prev < charsz_fo {
16✔
703
                defo!("fo_prev {} break;", fo_prev);
3✔
704
                // fileoffset is at the beginning of the file (or, cannot be moved back any more)
705
                break;
3✔
706
            }
13✔
707
            // if user-passed `--dt-after` and the sysline is prior to that filter then
708
            // stop processing
709
            match dt_after_or_before(syslinep.dt(), filter_dt_after_opt) {
13✔
710
                Result_Filter_DateTime1::OccursBefore => {
711
                    defo!("dt_after_or_before({:?},  {:?}) returned OccursBefore; break", syslinep.dt(), filter_dt_after_opt);
2✔
712
                    break;
2✔
713
                }
714
                Result_Filter_DateTime1::OccursAtOrAfter | Result_Filter_DateTime1::Pass => {},
11✔
715
            }
716
            // search for preceding sysline
717
            fo_prev -= charsz_fo;
11✔
718
            if fo_prev >= fo_prev_prev {
11✔
719
                // This will happen in case where the very first line of the file
720
                // holds a sysline with datetime pattern without a year, and that
721
                // sysline datetime pattern is different than all
722
                // proceeding syslines that have a year. (and it should only happen then)
723
                // Elicited by example in Issue #74
724
                de_err!("fo_prev {} ≥ {} fo_prev_prev, expected <; something is wrong", fo_prev, fo_prev_prev);
×
725
                // must break otherwise end up in an infinite loop
726
                break;
×
727
            }
11✔
728
            syslinep_prev_opt = Some(syslinep.clone());
11✔
729
        } // end loop
730
        defx!("return FileOk");
5✔
731

732
        FileProcessingResultBlockZero::FileOk
5✔
733
    }
5✔
734

735
    fn systemtime_at_uptime_zero(&self) -> Option<SystemTime>{
×
736
        self.syslinereader.systemtime_at_uptime_zero
×
737
    }
×
738

739
    pub fn process_uptime(
×
740
        &mut self,
×
741
    ) -> FileProcessingResultBlockZero {
×
742
        defn!();
×
743
        debug_assert!(!self.did_process_uptime(), "did_process_uptime() must only be called once");
×
744

745
        let fo_last = self.fileoffset_last();
×
746
        defo!("find_sysline(fo_last={})", fo_last);
×
747
        let syslinep = match self.find_sysline(fo_last) {
×
748
            ResultFindSysline::Found((_fo, syslinep_)) => {
×
749
                defo!("found sysline at fo_last={} {:?}", fo_last, syslinep_);
×
750

751
                syslinep_
×
752
            }
753
            ResultFindSysline::Done => {
×
754
                defx!("No sysline found");
×
755
                return FileProcessingResultBlockZero::FileErrNoSyslinesFound;
×
756
            }
757
            ResultFindSysline::Err(err) => {
×
758
                defx!("error finding sysline: {:?}", err);
×
759
                return FileProcessingResultBlockZero::FileErrIo(err);
×
760
            }
761
        };
762
        let dt = syslinep.dt();
×
763
        let diff_ = datetime_minus_systemtime(&dt, &UPTIME_DEFAULT_OFFSET);
×
764
        defo!("diff_ from UPTIME_DEFAULT_OFFSET {:?}", diff_);
×
765
        let diff_secs = diff_.num_seconds();
×
766
        defo!("diff_secs {:?}", diff_secs);
×
767
        let mut diff_nanos = diff_.subsec_nanos();
×
768
        defo!("diff_nanos {:?}", diff_nanos);
×
769
        if diff_nanos < 0 {
×
770
            diff_nanos = 0;
×
771
        }
×
NEW
772
        let diffs: StdDuration = StdDuration::new(diff_secs as u64, diff_nanos as u32);
×
773
        defo!("diffs {:?}", diffs);
×
774
        defo!("mtime()");
×
775
        let mtime = self.mtime();
×
NEW
776
        defo!("mtime {:?} (as DateTime {:?})", mtime, systemtime_to_datetime(&self.tz_offset, &mtime));
×
777
        // std::time::Duration is unsigned whereas chrono::Duration is signed.
778
        let st_at_zero = if diff_secs > 0 {
×
779
            defo!("checked_sub({:?})", diffs);
×
780
            match mtime.checked_sub(diffs) {
×
781
                Some(st) => st,
×
782
                None => {
783
                    defx!("failed to calculate systemtime at uptime zero");
×
784
                    return FileProcessingResultBlockZero::FileErrIo(std::io::Error::new(
×
785
                        std::io::ErrorKind::Other,
×
786
                        "failed to calculate systemtime at uptime zero",
×
787
                    ));
×
788
                }
789
            }
790
        } else {
791
            defo!("checked_add({:?})", diffs);
×
792
            match mtime.checked_add(diffs) {
×
793
                Some(st) => st,
×
794
                None => {
795
                    defx!("failed to calculate systemtime at uptime zero");
×
796
                    return FileProcessingResultBlockZero::FileErrIo(std::io::Error::new(
×
797
                        std::io::ErrorKind::Other,
×
798
                        "failed to calculate systemtime at uptime zero",
×
799
                    ));
×
800
                }
801
            }
802
        };
803
        self.syslinereader.systemtime_at_uptime_zero = Some(st_at_zero);
×
804
        defo!("systemtime_at_uptime_zero is  {:?}", self.syslinereader.systemtime_at_uptime_zero);
×
805
        #[cfg(debug_assertions)]
806
        {
807
            let d = systemtime_to_datetime(
×
808
                &self.tz_offset,
×
809
                &st_at_zero,
×
810
            );
811
            defo!("systemtime_at_uptime_zero as DateTime {:?}", d);
×
812
        }
813

814
        // The systemtime at uptime zero has been discovered.
815
        // So clear the lines that previously used the stand-in value for
816
        // `systemtime_at_uptime_zero`.
817
        self.syslinereader.clear_syslines();
×
818
        // The syslines gathered after this point will use the
819
        // correct `systemtime_at_uptime_zero`.
820

821
        defx!("return FileOk");
×
822

823
        FileProcessingResultBlockZero::FileOk
×
824
    }
×
825

826
    /// See [`SyslineReader::is_sysline_last`].
827
    ///
828
    /// [`SyslineReader::is_sysline_last`]: crate::readers::syslinereader::SyslineReader#method.is_sysline_last
829
    pub fn is_sysline_last(
×
830
        &self,
×
831
        syslinep: &SyslineP,
×
832
    ) -> bool {
×
833
        self.syslinereader
×
834
            .is_sysline_last(syslinep)
×
835
    }
×
836

837
    /// Try to `drop` data associated with the [`Block`] at [`BlockOffset`].
838
    /// This includes dropping associated [`Sysline`]s and [`Line`]s.
839
    /// This calls [`SyslineReader::drop_data`].
840
    ///
841
    /// _The caller must know what they are doing!_
842
    ///
843
    /// [`BlockOffset`]: crate::readers::blockreader::BlockOffset
844
    /// [`Sysline`]: crate::data::sysline::Sysline
845
    /// [`Line`]: crate::data::line::Line
846
    /// [`Block`]: crate::readers::blockreader::Block
847
    pub fn drop_data(
14✔
848
        &mut self,
14✔
849
        blockoffset: BlockOffset,
14✔
850
    ) -> bool {
14✔
851
        def1n!("({})", blockoffset);
14✔
852
        self.assert_stage(ProcessingStage::Stage3StreamSyslines);
14✔
853

854
        if !self.is_drop_data() {
14✔
855
            def1x!("return false; is_drop_data() is false");
×
856
            return false;
×
857
        }
14✔
858

859
        // `syslinereader.drop_data` is an expensive function, skip if possible.
860
        if blockoffset == self.drop_block_last {
14✔
861
            def1x!("({}) skip block, return true", blockoffset);
5✔
862
            return false;
5✔
863
        }
9✔
864

865
        if self
9✔
866
            .syslinereader
9✔
867
            .drop_data(blockoffset)
9✔
868
        {
869
            self.drop_block_last = blockoffset;
4✔
870
            def1x!("({}) return true", blockoffset);
4✔
871
            return true;
4✔
872
        }
5✔
873

874
        def1x!("({}) return false", blockoffset);
5✔
875
        false
5✔
876
    }
14✔
877

878
    /// Call [`drop_data`] for the data assocaited with the [`Block`]
879
    /// *preceding* the first block of the passed [`Sysline`].
880
    ///
881
    /// _The caller must know what they are doing!_
882
    ///
883
    /// [`drop_data`]: Self#method.drop_data
884
    /// [`Block`]: crate::readers::blockreader::Block
885
    /// [`Sysline`]: crate::data::sysline::Sysline
886
    pub fn drop_data_try(
24✔
887
        &mut self,
24✔
888
        syslinep: &SyslineP,
24✔
889
    ) -> bool {
24✔
890
        if !SyslogProcessor::STREAM_STAGE_DROP {
24✔
891
            de_wrn!("drop_data_try() called but SyslogProcessor::STREAM_STAGE_DROP is false");
×
892
            return false;
×
893
        }
24✔
894
        if !self.is_drop_data() {
24✔
895
            def1ñ!("is_drop_data() is false; return false");
×
896
            return false;
×
897
        }
24✔
898

899
        let bo_first: BlockOffset = (*syslinep).blockoffset_first();
24✔
900
        if bo_first > 1 {
24✔
901
            def1ñ!();
14✔
902
            return self.drop_data(bo_first - 2);
14✔
903
        }
10✔
904

905
        false
10✔
906
    }
24✔
907

908
    /// Calls [`self.syslinereader.find_sysline(fileoffset)`],
909
    /// and in some cases calls private function `drop_block` to drop
910
    /// previously processed [`Sysline`], [`Line`], and [`Block`s].
911
    ///
912
    /// This is what implements the "streaming" in "[streaming stage]".
913
    ///
914
    /// [`self.syslinereader.find_sysline(fileoffset)`]: crate::readers::syslinereader::SyslineReader#method.find_sysline
915
    /// [`Block`s]: crate::readers::blockreader::Block
916
    /// [`Line`]: crate::data::line::Line
917
    /// [`Sysline`]: crate::data::sysline::Sysline
918
    /// [streaming stage]: crate::readers::syslogprocessor::ProcessingStage#variant.Stage3StreamSyslines
919
    pub fn find_sysline(
88✔
920
        &mut self,
88✔
921
        fileoffset: FileOffset,
88✔
922
    ) -> ResultFindSysline {
88✔
923
        defn!("({})", fileoffset);
88✔
924
        let result: ResultFindSysline = self
88✔
925
            .syslinereader
88✔
926
            .find_sysline(fileoffset);
88✔
927
        match result {
88✔
928
            ResultFindSysline::Found(_) => {}
74✔
929
            ResultFindSysline::Done => {}
14✔
930
            ResultFindSysline::Err(ref err) => {
×
931
                self.set_error(err);
×
932
            }
×
933
        }
934
        defx!();
88✔
935

936
        result
88✔
937
    }
88✔
938

939
    /// Wrapper function for [`SyslineReader::find_sysline_between_datetime_filters`].
940
    /// Keeps a custom copy of any returned `Error` at `self.error`.
941
    ///
942
    /// [`SyslineReader::find_sysline_between_datetime_filters`]: crate::readers::syslinereader::SyslineReader#method.find_sysline_between_datetime_filters
943
    //
944
    // TODO: [2022/06/20] the `find` functions need consistent naming,
945
    //       `find_next`, `find_between`, `find_…` . The current design has
946
    //       the public-facing `find_` functions falling back on potential file-wide binary-search
947
    //       The binary-search only needs to be done during the stage 2. During stage 3, a simpler
948
    //       linear sequential search is more suitable, and more intuitive.
949
    //       More refactoring is in order.
950
    //       Also, a linear search can better detect rollover (i.e. when sysline datetime is missing year).
951
    // TODO: [2023/03/06] add stats tracking in `find` functions for number of
952
    //       "jumps" or bounces or fileoffset changes to confirm big-O
953
    #[inline(always)]
954
    pub fn find_sysline_between_datetime_filters(
5✔
955
        &mut self,
5✔
956
        fileoffset: FileOffset,
5✔
957
    ) -> ResultFindSysline {
5✔
958
        defn!("({})", fileoffset);
5✔
959

960
        let result = match self
5✔
961
            .syslinereader
5✔
962
            .find_sysline_between_datetime_filters(
5✔
963
                fileoffset,
5✔
964
                &self.filter_dt_after_opt,
5✔
965
                &self.filter_dt_before_opt,
5✔
966
            ) {
5✔
967
            ResultFindSysline::Err(err) => {
×
968
                self.set_error(&err);
×
969

970
                ResultFindSysline::Err(err)
×
971
            }
972
            val => val,
5✔
973
        };
974

975
        defx!("({})", fileoffset);
5✔
976

977
        result
5✔
978
    }
5✔
979

980
    /// Wrapper function for a recurring sanity check.
981
    ///
982
    /// Good for checking functions `process_stage…` are called in
983
    /// the correct order.
984
    // XXX: is there a rust-ic way to enforce stage procession behavior
985
    //      at compile-time? It's a fairly simple enumerated type. Could a
986
    //      `match` tree (or something like that) be used?
987
    //      run-time checks of rust enum values seems hacky.
988
    #[inline(always)]
989
    fn assert_stage(
469✔
990
        &self,
469✔
991
        stage_expact: ProcessingStage,
469✔
992
    ) {
469✔
993
        debug_assert_eq!(
469✔
994
            self.processingstage, stage_expact,
995
            "Unexpected Processing Stage {:?}, expected Processing Stage {:?}",
×
996
            self.processingstage, stage_expact,
997
        );
998
    }
469✔
999

1000
    /// Stage 0 does some sanity checks on the file.
1001
    // TODO: this is redundant and has already been performed by functions in
1002
    //       `filepreprocessor` and `BlockReader::new`.
1003
    pub fn process_stage0_valid_file_check(&mut self) -> FileProcessingResultBlockZero {
80✔
1004
        defn!();
80✔
1005
        // sanity check calls are in correct order
1006
        self.assert_stage(ProcessingStage::Stage0ValidFileCheck);
80✔
1007
        self.processingstage = ProcessingStage::Stage0ValidFileCheck;
80✔
1008

1009
        if self.filesz() == 0 {
80✔
1010
            defx!("filesz 0; return {:?}", FileProcessingResultBlockZero::FileErrEmpty);
2✔
1011
            return FileProcessingResultBlockZero::FileErrEmpty;
2✔
1012
        }
78✔
1013
        defx!("return {:?}", FileProcessingResultBlockZero::FileOk);
78✔
1014

1015
        FileProcessingResultBlockZero::FileOk
78✔
1016
    }
80✔
1017

1018
    /// Stage 1: Can [`Line`s] and [`Sysline`s] be parsed from the first block
1019
    /// (block zero)?
1020
    ///
1021
    /// [`Sysline`s]: crate::data::sysline::Sysline
1022
    /// [`Line`s]: crate::data::line::Line
1023
    pub fn process_stage1_blockzero_analysis(&mut self) -> FileProcessingResultBlockZero {
77✔
1024
        defn!();
77✔
1025
        self.assert_stage(ProcessingStage::Stage0ValidFileCheck);
77✔
1026
        self.processingstage = ProcessingStage::Stage1BlockzeroAnalysis;
77✔
1027

1028
        let result: FileProcessingResultBlockZero = self.blockzero_analysis();
77✔
1029
        // stored syslines may be zero if a "partial" `Line` was examined
1030
        // e.g. an incomplete and temporary `Line` instance was examined.
1031
        defo!(
77✔
1032
            "blockzero_analysis() stored syslines {}",
77✔
1033
            self.syslinereader
77✔
1034
                .count_syslines_stored()
77✔
1035
        );
1036
        match result {
77✔
1037
            FileProcessingResult::FileOk => {}
39✔
1038
            // skip further processing if not `FileOk`
1039
            _ => {
1040
                defx!("return {:?}", result);
38✔
1041
                return result;
38✔
1042
            }
1043
        }
1044

1045
        defx!("return {:?}", result);
39✔
1046

1047
        result
39✔
1048
    }
77✔
1049

1050
    /// Stage 2: Given the an optional datetime filter (user-passed
1051
    /// `--dt-after`), can a log message with a datetime after that filter be
1052
    /// found?
1053
    pub fn process_stage2_find_dt(
8✔
1054
        &mut self,
8✔
1055
        filter_dt_after_opt: &DateTimeLOpt,
8✔
1056
    ) -> FileProcessingResultBlockZero {
8✔
1057
        defn!();
8✔
1058
        self.assert_stage(ProcessingStage::Stage1BlockzeroAnalysis);
8✔
1059
        self.processingstage = ProcessingStage::Stage2FindDt;
8✔
1060

1061
        // datetime formats without a year requires special handling
1062
        if !self.syslinereader.dt_pattern_has_year() &&
8✔
1063
            !self.syslinereader.dt_pattern_uptime()
4✔
1064
        {
1065
            defo!("!dt_pattern_has_year() && !dt_pattern_uptime()");
4✔
1066
            let mtime: SystemTime = self.mtime();
4✔
1067
            match self.process_missing_year(mtime, filter_dt_after_opt) {
4✔
1068
                FileProcessingResultBlockZero::FileOk => {}
4✔
1069
                result => {
×
1070
                    defx!("Bad result {:?}", result);
×
1071
                    return result;
×
1072
                }
1073
            }
1074
        } else if self.syslinereader.dt_pattern_uptime() {
4✔
1075
            defo!("dt_pattern_uptime()");
×
1076
            match self.process_uptime() {
×
1077
                FileProcessingResultBlockZero::FileOk => {}
×
1078
                result => {
×
1079
                    defx!("Bad result {:?}", result);
×
1080
                    return result;
×
1081
                }
1082
            }
1083
        }
4✔
1084

1085
        defx!();
8✔
1086

1087
        FileProcessingResultBlockZero::FileOk
8✔
1088
    }
8✔
1089

1090
    /// Stage 3: during "[streaming]", processed and printed data stored by
1091
    /// underlying "Readers" is proactively dropped
1092
    /// (removed from process memory).
1093
    ///
1094
    /// Also see [`find_sysline`].
1095
    ///
1096
    /// [streaming]: ProcessingStage#variant.Stage3StreamSyslines
1097
    /// [`find_sysline`]: self::SyslogProcessor#method.find_sysline
1098
    pub fn process_stage3_stream_syslines(&mut self) -> FileProcessingResultBlockZero {
4✔
1099
        defñ!();
4✔
1100
        self.assert_stage(ProcessingStage::Stage2FindDt);
4✔
1101
        self.processingstage = ProcessingStage::Stage3StreamSyslines;
4✔
1102

1103
        FileProcessingResultBlockZero::FileOk
4✔
1104
    }
4✔
1105

1106
    /// Stage 4: no more [`Sysline`s] to process. Create and return a
1107
    /// [`Summary`].
1108
    ///
1109
    /// [`Summary`]: crate::readers::summary::Summary
1110
    /// [`Sysline`s]: crate::data::sysline::Sysline
1111
    pub fn process_stage4_summary(&mut self) -> Summary {
1✔
1112
        defñ!();
1✔
1113
        // XXX: this can be called from various stages, no need to assert
1114
        self.processingstage = ProcessingStage::Stage4Summary;
1✔
1115

1116
        self.summary_complete()
1✔
1117
    }
1✔
1118

1119
    /// Review bytes in the first block ("zero block").
1120
    /// If enough `Line` found then return [`FileOk`]
1121
    /// else return [`FileErrNoLinesFound`].
1122
    ///
1123
    /// [`FileOk`]: self::FileProcessingResultBlockZero
1124
    /// [`FileErrNoLinesFound`]: self::FileProcessingResultBlockZero
1125
    pub(super) fn blockzero_analysis_bytes(&mut self) -> FileProcessingResultBlockZero {
77✔
1126
        defn!();
77✔
1127
        self.assert_stage(ProcessingStage::Stage1BlockzeroAnalysis);
77✔
1128

1129
        let blockp: BlockP = match self
77✔
1130
            .syslinereader
77✔
1131
            .linereader
77✔
1132
            .blockreader
77✔
1133
            .read_block(0)
77✔
1134
        {
1135
            ResultFindReadBlock::Found(blockp_) => blockp_,
77✔
1136
            ResultFindReadBlock::Done => {
×
1137
                defx!("return FileErrEmpty");
×
1138
                return FileProcessingResultBlockZero::FileErrEmpty;
×
1139
            }
1140
            ResultFindReadBlock::Err(err) => {
×
1141
                self.set_error(&err);
×
1142
                defx!("return FileErrIo({:?})", err);
×
1143
                return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1144
            }
1145
        };
1146
        // if the first block is too small then there will not be enough
1147
        // data to parse a `Line` or `Sysline`
1148
        let blocksz0: BlockSz = (*blockp).len() as BlockSz;
77✔
1149
        let require_sz: BlockSz = std::cmp::min(Self::BLOCKZERO_ANALYSIS_BYTES_MIN, self.blocksz());
77✔
1150
        defo!("blocksz0 {} < {} require_sz", blocksz0, require_sz);
77✔
1151
        if blocksz0 < require_sz {
77✔
1152
            defx!("return FileErrTooSmall");
9✔
1153
            return FileProcessingResultBlockZero::FileErrTooSmall;
9✔
1154
        }
68✔
1155
        // if the first `BLOCKZERO_ANALYSIS_BYTES_NULL_MAX` bytes are all
1156
        // zero then this is not a text file and processing should stop.
1157
        if (*blockp).iter().take(Self::BLOCKZERO_ANALYSIS_BYTES_NULL_MAX).all(|&b| b == 0) {
210✔
1158
            defx!("return FileErrNullBytes");
2✔
1159
            return FileProcessingResultBlockZero::FileErrNullBytes;
2✔
1160
        }
66✔
1161

1162
        defx!("return FileOk");
66✔
1163

1164
        FileProcessingResultBlockZero::FileOk
66✔
1165
    }
77✔
1166

1167
    /// Attempt to find a minimum number of [`Line`s] within the first block
1168
    /// (block zero).
1169
    /// If enough `Line` found then return [`FileOk`]
1170
    /// else return [`FileErrNoLinesFound`].
1171
    ///
1172
    /// [`Line`s]: crate::data::line::Line
1173
    /// [`FileOk`]: self::FileProcessingResultBlockZero
1174
    /// [`FileErrNoLinesFound`]: self::FileProcessingResultBlockZero
1175
    pub(super) fn blockzero_analysis_lines(&mut self) -> FileProcessingResultBlockZero {
66✔
1176
        defn!();
66✔
1177
        self.assert_stage(ProcessingStage::Stage1BlockzeroAnalysis);
66✔
1178

1179
        let blockp: BlockP = match self
66✔
1180
            .syslinereader
66✔
1181
            .linereader
66✔
1182
            .blockreader
66✔
1183
            .read_block(0)
66✔
1184
        {
1185
            ResultFindReadBlock::Found(blockp_) => blockp_,
66✔
1186
            ResultFindReadBlock::Done => {
×
1187
                defx!("return FileErrEmpty");
×
1188
                return FileProcessingResultBlockZero::FileErrEmpty;
×
1189
            }
1190
            ResultFindReadBlock::Err(err) => {
×
1191
                self.set_error(&err);
×
1192
                defx!("return FileErrIo({:?})", err);
×
1193
                return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1194
            }
1195
        };
1196
        let blocksz0: BlockSz = (*blockp).len() as BlockSz;
66✔
1197
        let mut _partial_found = false;
66✔
1198
        let mut fo: FileOffset = 0;
66✔
1199
        // how many lines have been found?
1200
        let mut found: Count = 0;
66✔
1201
        // must find at least this many lines in block zero to be FileOk
1202
        let found_min: Count = *BLOCKZERO_ANALYSIS_LINE_COUNT_MIN_MAP
66✔
1203
            .get(&blocksz0)
66✔
1204
            .unwrap();
66✔
1205
        defx!("block zero blocksz {} found_min {}", blocksz0, found_min);
66✔
1206
        // find `found_min` Lines or whatever can be found within block 0
1207
        while found < found_min {
101✔
1208
            fo = match self
66✔
1209
                .syslinereader
66✔
1210
                .linereader
66✔
1211
                .find_line_in_block(fo)
66✔
1212
            {
1213
                (ResultFindLine::Found((fo_next, _linep)), _) => {
39✔
1214
                    found += 1;
39✔
1215

1216
                    fo_next
39✔
1217
                }
1218
                (ResultFindLine::Done, partial) => {
27✔
1219
                    match partial {
27✔
1220
                        Some(_) => {
27✔
1221
                            found += 1;
27✔
1222
                            _partial_found = true;
27✔
1223
                        }
27✔
1224
                        None => {}
×
1225
                    }
1226
                    break;
27✔
1227
                }
1228
                (ResultFindLine::Err(err), _) => {
×
1229
                    self.set_error(&err);
×
1230
                    defx!("return FileErrIo({:?})", err);
×
1231
                    return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1232
                }
1233
            };
1234
            if 0 != self
39✔
1235
                .syslinereader
39✔
1236
                .linereader
39✔
1237
                .block_offset_at_file_offset(fo)
39✔
1238
            {
1239
                break;
4✔
1240
            }
35✔
1241
        }
1242

1243
        let fpr: FileProcessingResultBlockZero = match found >= found_min {
66✔
1244
            true => FileProcessingResultBlockZero::FileOk,
66✔
1245
            false => FileProcessingResultBlockZero::FileErrNoLinesFound,
×
1246
        };
1247

1248
        defx!("found {} lines, partial_found {}, require {} lines, return {:?}", found, _partial_found, found_min, fpr);
66✔
1249

1250
        fpr
66✔
1251
    }
66✔
1252

1253
    /// Attempt to find a minimum number of [`Sysline`] within the first block.
1254
    /// If enough `Sysline` found then return [`FileOk`]
1255
    /// else return [`FileErrNoSyslinesFound`].
1256
    ///
1257
    /// [`Sysline`]: crate::data::sysline::Sysline
1258
    /// [`FileOk`]: self::FileProcessingResultBlockZero
1259
    /// [`FileErrNoSyslinesFound`]: self::FileProcessingResultBlockZero
1260
    pub(super) fn blockzero_analysis_syslines(&mut self) -> FileProcessingResultBlockZero {
66✔
1261
        defn!();
66✔
1262
        self.assert_stage(ProcessingStage::Stage1BlockzeroAnalysis);
66✔
1263

1264
        let blockp: BlockP = match self
66✔
1265
            .syslinereader
66✔
1266
            .linereader
66✔
1267
            .blockreader
66✔
1268
            .read_block(0)
66✔
1269
        {
1270
            ResultFindReadBlock::Found(blockp_) => blockp_,
66✔
1271
            ResultFindReadBlock::Done => {
×
1272
                defx!("return FileErrEmpty");
×
1273
                return FileProcessingResultBlockZero::FileErrEmpty;
×
1274
            }
1275
            ResultFindReadBlock::Err(err) => {
×
1276
                self.set_error(&err);
×
1277
                defx!("return FileErrIo({:?})", err);
×
1278
                return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1279
            }
1280
        };
1281
        let blocksz0: BlockSz = (*blockp).len() as BlockSz;
66✔
1282
        let mut fo: FileOffset = 0;
66✔
1283
        // how many syslines have been found?
1284
        let mut found: Count = 0;
66✔
1285
        // must find at least this many syslines in block zero to be FileOk
1286
        let found_min: Count = *BLOCKZERO_ANALYSIS_SYSLINE_COUNT_MIN_MAP
66✔
1287
            .get(&blocksz0)
66✔
1288
            .unwrap();
66✔
1289
        defo!("block zero blocksz {} found_min {:?}", blocksz0, found_min);
66✔
1290

1291
        // find `at_max` Syslines within block zero
1292
        while found < found_min
97✔
1293
            && self.syslinereader.block_offset_at_file_offset(fo) == 0
66✔
1294
        {
1295
            fo = match self
66✔
1296
                .syslinereader
66✔
1297
                .find_sysline_in_block(fo)
66✔
1298
            {
1299
                (ResultFindSysline::Found((fo_next, _slinep)), _) => {
31✔
1300
                    found += 1;
31✔
1301
                    defo!("Found; found {} syslines, fo_next {}", found, fo_next);
31✔
1302

1303
                    fo_next
31✔
1304
                }
1305
                (ResultFindSysline::Done, partial_found) => {
35✔
1306
                    defo!("Done; found {} syslines, partial_found {}", found, partial_found);
35✔
1307
                    if partial_found {
35✔
1308
                        found += 1;
8✔
1309
                    }
27✔
1310
                    break;
35✔
1311
                }
1312
                (ResultFindSysline::Err(err), _) => {
×
1313
                    self.set_error(&err);
×
1314
                    defx!("return FileErrIo({:?})", err);
×
1315
                    return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1316
                }
1317
            };
1318
        }
1319

1320
        if found == 0 {
66✔
1321
            defx!("found {} syslines, require {} syslines, return FileErrNoSyslinesFound", found, found_min);
27✔
1322
            return FileProcessingResultBlockZero::FileErrNoSyslinesFound;
27✔
1323
        }
39✔
1324

1325
        let patt_count_a = self.syslinereader.dt_patterns_counts_in_use();
39✔
1326
        defo!("dt_patterns_counts_in_use {}", patt_count_a);
39✔
1327

1328
        if !self.syslinereader.dt_patterns_analysis() {
39✔
1329
            de_err!("dt_patterns_analysis() failed which is unexpected; return FileErrNoSyslinesFound");
×
1330
            return FileProcessingResultBlockZero::FileErrNoSyslinesFound;
×
1331
        }
39✔
1332

1333
        let _patt_count_b = self.syslinereader.dt_patterns_counts_in_use();
39✔
1334
        debug_assert_eq!(
39✔
1335
            _patt_count_b,
1336
            SyslogProcessor::DT_PATTERN_MAX,
1337
            "expected patterns to be reduced to {}, found {:?}",
×
1338
            SyslogProcessor::DT_PATTERN_MAX,
1339
            _patt_count_b,
1340
        );
1341

1342
        // if more than one `DateTimeParseInstr` was used then the syslines
1343
        // must be reparsed using the one chosen `DateTimeParseInstr`
1344
        if patt_count_a > 1 {
39✔
1345
            defo!("must reprocess all syslines using limited patterns (used {} DateTimeParseInstr; must only use {})!", patt_count_a, 1);
8✔
1346

1347
            self.syslinereader.clear_syslines();
8✔
1348
            // find `at_max` Syslines within block zero
1349
            found = 0;
8✔
1350
            fo = 0;
8✔
1351
            while found < found_min
11✔
1352
                && self.syslinereader.block_offset_at_file_offset(fo) == 0
8✔
1353
            {
1354
                fo = match self
8✔
1355
                    .syslinereader
8✔
1356
                    .find_sysline_in_block(fo)
8✔
1357
                {
1358
                    (ResultFindSysline::Found((fo_next, _slinep)), _) => {
3✔
1359
                        found += 1;
3✔
1360
                        defo!("Found; found {} syslines, fo_next {}", found, fo_next);
3✔
1361

1362
                        fo_next
3✔
1363
                    }
1364
                    (ResultFindSysline::Done, partial_found) => {
5✔
1365
                        defo!("Done; found {} syslines, partial_found {}", found, partial_found);
5✔
1366
                        if partial_found {
5✔
1367
                            found += 1;
5✔
1368
                        }
5✔
1369
                        break;
5✔
1370
                    }
1371
                    (ResultFindSysline::Err(err), _) => {
×
1372
                        self.set_error(&err);
×
1373
                        defx!("return FileErrIo({:?})", err);
×
1374
                        return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1375
                    }
1376
                };
1377
            }
1378
            defo!("done reprocessing.");
8✔
1379
        } else {
1380
            defo!("no reprocess needed ({} DateTimeParseInstr)!", patt_count_a);
31✔
1381
        }
1382

1383
        let fpr: FileProcessingResultBlockZero = match found >= found_min {
39✔
1384
            true => FileProcessingResultBlockZero::FileOk,
39✔
1385
            false => FileProcessingResultBlockZero::FileErrNoSyslinesFound,
×
1386
        };
1387

1388
        // sanity check that only one `DateTimeParseInstr` is in use
1389
        if cfg!(debug_assertions) && self.syslinereader.dt_patterns_counts_in_use() != 1 {
39✔
1390
            de_wrn!(
×
1391
                "dt_patterns_counts_in_use() = {}, expected 1; for {:?}",
×
1392
                self.syslinereader.dt_patterns_counts_in_use(), self.path()
×
1393
            );
×
1394
        }
39✔
1395

1396
        if self.syslinereader.is_streamed_file()
39✔
1397
            && !self.syslinereader.dt_pattern_has_year()
×
1398
        {
1399
            self.syslinereader.linereader.blockreader.disable_drop_data();
×
1400
            debug_assert!(!self.is_drop_data(), "is_drop_data() should be false");
×
1401
        }
39✔
1402

1403
        defx!("found {} syslines, require {} syslines, return {:?}", found, found_min, fpr);
39✔
1404

1405
        fpr
39✔
1406
    }
66✔
1407

1408
    /// Call `self.blockzero_analysis_lines`.
1409
    /// If that passes then call `self.blockzero_analysis_syslines`.
1410
    pub(super) fn blockzero_analysis(&mut self) -> FileProcessingResultBlockZero {
77✔
1411
        defn!();
77✔
1412
        assert!(!self.blockzero_analysis_done, "blockzero_analysis_lines should only be completed once.");
77✔
1413
        self.blockzero_analysis_done = true;
77✔
1414
        self.assert_stage(ProcessingStage::Stage1BlockzeroAnalysis);
77✔
1415

1416
        if self.syslinereader.filesz() == 0 {
77✔
1417
            defx!("return FileErrEmpty");
×
1418
            return FileProcessingResultBlockZero::FileErrEmpty;
×
1419
        }
77✔
1420

1421
        let result: FileProcessingResultBlockZero = self.blockzero_analysis_bytes();
77✔
1422
        if !result.is_ok() {
77✔
1423
            defx!("syslinereader.blockzero_analysis_bytes() was !is_ok(), return {:?}", result);
11✔
1424
            return result;
11✔
1425
        };
66✔
1426

1427
        let result: FileProcessingResultBlockZero = self.blockzero_analysis_lines();
66✔
1428
        if !result.is_ok() {
66✔
1429
            defx!("syslinereader.blockzero_analysis() was !is_ok(), return {:?}", result);
×
1430
            return result;
×
1431
        };
66✔
1432

1433
        let result: FileProcessingResultBlockZero = self.blockzero_analysis_syslines();
66✔
1434
        defx!("return {:?}", result);
66✔
1435

1436
        result
66✔
1437
    }
77✔
1438

1439
    #[cfg(test)]
1440
    pub(crate) fn dropped_blocks(&self) -> SetDroppedBlocks {
3✔
1441
        self.syslinereader
3✔
1442
            .linereader
3✔
1443
            .blockreader
3✔
1444
            .dropped_blocks
3✔
1445
            .clone()
3✔
1446
    }
3✔
1447

1448
    #[cfg(test)]
1449
    pub(crate) fn dropped_lines(&self) -> SetDroppedLines {
3✔
1450
        self.syslinereader
3✔
1451
            .linereader
3✔
1452
            .dropped_lines
3✔
1453
            .clone()
3✔
1454
    }
3✔
1455

1456
    #[cfg(test)]
1457
    pub(crate) fn dropped_syslines(&self) -> SetDroppedSyslines {
3✔
1458
        self.syslinereader
3✔
1459
            .dropped_syslines
3✔
1460
            .clone()
3✔
1461
    }
3✔
1462

1463
    pub fn summary(&self) -> SummarySyslogProcessor {
7✔
1464
        let syslogprocessor_missing_year = self.missing_year;
7✔
1465

1466
        SummarySyslogProcessor {
7✔
1467
            syslogprocessor_missing_year,
7✔
1468
        }
7✔
1469
    }
7✔
1470

1471
    /// Return an up-to-date [`Summary`] instance for this `SyslogProcessor`.
1472
    ///
1473
    /// Probably not useful or interesting before
1474
    /// `ProcessingStage::Stage4Summary`.
1475
    ///
1476
    /// [`Summary`]: crate::readers::summary::Summary
1477
    pub fn summary_complete(&self) -> Summary {
4✔
1478
        let path = self.path().clone();
4✔
1479
        let path_ntf = None;
4✔
1480
        let filetype = self.filetype();
4✔
1481
        let logmessagetype = filetype.to_logmessagetype();
4✔
1482
        let summaryblockreader = self.syslinereader.linereader.blockreader.summary();
4✔
1483
        let summarylinereader = self.syslinereader.linereader.summary();
4✔
1484
        let summarysyslinereader = self.syslinereader.summary();
4✔
1485
        let summarysyslogprocessor = self.summary();
4✔
1486
        let error: Option<String> = self.error.clone();
4✔
1487

1488
        Summary::new(
4✔
1489
            path,
4✔
1490
            path_ntf,
4✔
1491
            filetype,
4✔
1492
            logmessagetype,
4✔
1493
            Some(summaryblockreader),
4✔
1494
            Some(summarylinereader),
4✔
1495
            Some(summarysyslinereader),
4✔
1496
            Some(summarysyslogprocessor),
4✔
1497
            None,
4✔
1498
            None,
4✔
1499
            None,
4✔
1500
            error,
4✔
1501
        )
1502
    }
4✔
1503
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc