• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jtmoon79 / super-speedy-syslog-searcher / 17195373054

24 Aug 2025 11:49PM UTC coverage: 58.19% (-0.3%) from 58.48%
17195373054

push

github

jtmoon79
(TOOLS) compare-log-mergers.sh hyperfine -i

12075 of 20751 relevant lines covered (58.19%)

21914.14 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

63.22
/src/readers/syslogprocessor.rs
1
// src/readers/syslogprocessor.rs
2
// …
3

4
//! Implements a [`SyslogProcessor`], the driver of the processing stages for
5
//! a "syslog" file using a [`SyslineReader`].
6
//!
7
//! A "syslog" file in this context means any text-based file with logged
8
//! messages with a datetime stamp.
9
//! The file may use a formally defined log message format (e.g. RFC 5424)
10
//! or an ad-hoc log message format (most log files).<br/>
11
//! The two common assumptions are that:
12
//! 1. each log message has a datetime stamp on the first line
13
//! 2. log messages are in chronological order
14
//!
15
//! Sibling of [`FixedStructReader`]. But far more complicated due to the
16
//! ad-hoc nature of log files.
17
//! 
18
//! This is an _s4lib_ structure used by the binary program _s4_.
19
//!
20
//! [`FixedStructReader`]: crate::readers::fixedstructreader::FixedStructReader
21
//! [`SyslineReader`]: crate::readers::syslinereader::SyslineReader
22
//! [`SyslogProcessor`]: SyslogProcessor
23

24
#![allow(non_snake_case)]
25

26
use crate::common::{
27
    Count,
28
    FPath,
29
    FileOffset,
30
    FileProcessingResult,
31
    FileSz,
32
    FileType,
33
    SYSLOG_SZ_MAX,
34
};
35
use crate::data::datetime::{
36
    dt_after_or_before,
37
    systemtime_to_datetime,
38
    datetime_minus_systemtime,
39
    DateTimeL,
40
    DateTimeLOpt,
41
    Duration,
42
    FixedOffset,
43
    Result_Filter_DateTime1,
44
    SystemTime,
45
    Year,
46
    UPTIME_DEFAULT_OFFSET,
47
};
48
use crate::data::sysline::SyslineP;
49
use crate::{e_err, de_err, de_wrn};
50
use crate::readers::blockreader::{
51
    BlockIndex,
52
    BlockOffset,
53
    BlockP,
54
    BlockSz,
55
    ResultS3ReadBlock,
56
};
57
#[cfg(test)]
58
use crate::readers::blockreader::SetDroppedBlocks;
59
#[cfg(test)]
60
use crate::readers::linereader::SetDroppedLines;
61
#[cfg(test)]
62
use crate::readers::syslinereader::SetDroppedSyslines;
63
#[doc(hidden)]
64
pub use crate::readers::linereader::ResultS3LineFind;
65
#[doc(hidden)]
66
pub use crate::readers::syslinereader::{
67
    DateTimePatternCounts,
68
    ResultS3SyslineFind,
69
    SummarySyslineReader,
70
    SyslineReader,
71
};
72
use crate::readers::summary::Summary;
73

74
use std::fmt;
75
use std::fmt::Debug;
76
use std::io::{Error, ErrorKind, Result};
77
use std::time::Duration as StdDuration;
78

79
use ::chrono::Datelike;
80
use ::lazy_static::lazy_static;
81
use ::rangemap::RangeMap;
82
use ::si_trace_print::{def1n, def1x, def1ñ, defn, defo, defx, defñ};
83

84

85
// ---------------
86
// SyslogProcessor
87

88
/// `SYSLOG_SZ_MAX` as a `BlockSz`.
89
pub(crate) const SYSLOG_SZ_MAX_BSZ: BlockSz = SYSLOG_SZ_MAX as BlockSz;
90

91
/// Typed [`FileProcessingResult`] for "block zero analysis".
92
///
93
/// [`FileProcessingResult`]: crate::common::FileProcessingResult
94
pub type FileProcessingResultBlockZero = FileProcessingResult<std::io::Error>;
95

96
/// Enum for the [`SyslogProcessor`] processing stages. Each file processed
97
/// advances through these stages. Sometimes stages may be skipped.
98
///
99
/// [`SyslogProcessor`]: self::SyslogProcessor
100
#[derive(Debug, Eq, Ord, PartialEq, PartialOrd)]
101
pub enum ProcessingStage {
102
    /// Does the file exist and is it a parseable type?
103
    Stage0ValidFileCheck,
104
    /// Check file can be parsed by trying to parse it. Determine the
105
    /// datetime patterns of any found [`Sysline`s].<br/>
106
    /// If no `Sysline`s are found then advance to `Stage4Summary`.
107
    ///
108
    /// [`Sysline`s]: crate::data::sysline::Sysline
109
    Stage1BlockzeroAnalysis,
110
    /// Find the first [`Sysline`] in the syslog file.<br/>
111
    /// If passed CLI option `--after` then find the first `Sysline` with
112
    /// datetime at or after the user-passed [`DateTimeL`].
113
    ///
114
    /// [`Sysline`]: crate::data::sysline::Sysline
115
    /// [`DateTimeL`]: crate::data::datetime::DateTimeL
116
    Stage2FindDt,
117
    /// Advanced through the syslog file to the end.<br/>
118
    /// If passed CLI option `--before` then process up to 
119
    /// the last [`Sysline`] with datetime at or before the user-passed
120
    /// [`DateTimeL`]. Otherwise, process all remaining Syslines.
121
    ///
122
    /// While advancing, try to [`drop`] previously processed data `Block`s,
123
    /// `Line`s, and `Sysline`s to lessen memory allocated.
124
    /// a.k.a. "_streaming stage_".
125
    ///
126
    /// Also see function [`find_sysline`].
127
    ///
128
    /// [`Sysline`]: crate::data::sysline::Sysline
129
    /// [`DateTimeL`]: crate::data::datetime::DateTimeL
130
    /// [`find_sysline`]: self::SyslogProcessor#method.find_sysline
131
    /// [`drop`]: self::SyslogProcessor#method.drop_data_try
132
    Stage3StreamSyslines,
133
    /// If passed CLI option `--summary` then print a summary of
134
    /// various information about the processed file.
135
    Stage4Summary,
136
}
137

138
/// [`BlockSz`] in a [`Range`].
139
///
140
/// [`Range`]: std::ops::Range
141
/// [`BlockSz`]: crate::readers::blockreader::BlockSz
142
type BszRange = std::ops::Range<BlockSz>;
143

144
/// Map [`BlockSz`] to a [`Count`].
145
///
146
/// [`BlockSz`]: crate::readers::blockreader::BlockSz
147
/// [`Count`]: crate::common::Count
148
type MapBszRangeToCount = RangeMap<u64, Count>;
149

150
lazy_static! {
151
    /// For files in `blockzero_analyis`, the number of [`Line`]s needed to
152
    /// be found within block zero.
153
    ///
154
    /// [`Line`]: crate::data::line::Line
155
    pub static ref BLOCKZERO_ANALYSIS_LINE_COUNT_MIN_MAP: MapBszRangeToCount = {
156
        defñ!("lazy_static! BLOCKZERO_ANALYSIS_LINE_COUNT_MIN_MAP::new()");
157

158
        let mut m = MapBszRangeToCount::new();
159
        m.insert(BszRange{start: 0, end: SYSLOG_SZ_MAX_BSZ}, 1);
160
        m.insert(BszRange{start: SYSLOG_SZ_MAX_BSZ, end: SYSLOG_SZ_MAX_BSZ * 3}, 3);
161
        m.insert(BszRange{start: SYSLOG_SZ_MAX_BSZ * 3, end: BlockSz::MAX}, 3);
162

163
        m
164
    };
165

166
    /// For files in `blockzero_analyis`, the number of [`Sysline`]s needed to
167
    /// be found within block zero.
168
    ///
169
    /// [`Sysline`]: crate::data::sysline::Sysline
170
    pub static ref BLOCKZERO_ANALYSIS_SYSLINE_COUNT_MIN_MAP: MapBszRangeToCount = {
171
        defñ!("lazy_static! BLOCKZERO_ANALYSIS_SYSLINE_COUNT_MIN_MAP::new()");
172

173
        let mut m = MapBszRangeToCount::new();
174
        m.insert(BszRange{start: 0, end: SYSLOG_SZ_MAX_BSZ}, 1);
175
        m.insert(BszRange{start: SYSLOG_SZ_MAX_BSZ, end: BlockSz::MAX}, 2);
176

177
        m
178
    };
179

180
    /// 25 hours.
181
    /// For processing syslog files without a year.
182
    /// If there is a datetime jump backwards more than this value then
183
    /// a year rollover happened.
184
    ///
185
    /// e.g. given log messages
186
    ///     Dec 31 23:59:59 [INFO] One!
187
    ///     Jan 1 00:00:00 [INFO] Happy New Year!!!
188
    /// These messages interpreted as the same year would be a jump backwards
189
    /// in time.
190
    /// Of course, this apparent "jump backwards" means the year changed.
191
    // XXX: cannot make `const` because `secs` is a private field
192
    static ref BACKWARDS_TIME_JUMP_MEANS_NEW_YEAR: Duration = Duration::try_seconds(60 * 60 * 25).unwrap();
193
}
194

195
/// The `SyslogProcessor` uses [`SyslineReader`] to find [`Sysline`s] in a file.
196
///
197
/// A `SyslogProcessor` has knowledge of:
198
/// - the different stages of processing a syslog file
199
/// - stores optional datetime filters and searches with them
200
/// - handles special cases of a syslog file with a datetime format without a
201
///   year
202
///
203
/// A `SyslogProcessor` is driven by a thread to fully process one syslog file.
204
///
205
/// During "[streaming stage]", the `SyslogProcessor` will proactively `drop`
206
/// data that has been processed and printed. It does so by calling
207
/// private function [`drop_data_try`] during function [`find_sysline`].
208
///
209
/// A `SyslogProcessor` presumes syslog messages are in chronological order.
210
///
211
/// [`Sysline`s]: crate::data::sysline::Sysline
212
/// [`SyslineReader`]: crate::readers::syslinereader::SyslineReader
213
/// [`LineReader`]: crate::readers::linereader::LineReader
214
/// [`BlockReader`]: crate::readers::blockreader::BlockReader
215
/// [`drop_data_try`]: self::SyslogProcessor#method.drop_data_try
216
/// [`find_sysline`]: self::SyslogProcessor#method.find_sysline
217
/// [streaming stage]: self::ProcessingStage#variant.Stage3StreamSyslines
218
pub struct SyslogProcessor {
219
    syslinereader: SyslineReader,
220
    /// Current `ProcessingStage`.
221
    processingstage: ProcessingStage,
222
    /// `FPath`.
223
    // TODO: remove this, use the `BlockReader` path, (DRY)
224
    path: FPath,
225
    // TODO: remove this, use the `BlockReader` blocksz, (DRY)
226
    blocksz: BlockSz,
227
    /// `FixedOffset` timezone for datetime formats without a timezone.
228
    tz_offset: FixedOffset,
229
    /// Optional filter, syslines _after_ this `DateTimeL`.
230
    filter_dt_after_opt: DateTimeLOpt,
231
    /// Optional filter, syslines _before_ this `DateTimeL`.
232
    filter_dt_before_opt: DateTimeLOpt,
233
    /// Internal sanity check, has `self.blockzero_analysis()` completed?
234
    blockzero_analysis_done: bool,
235
    /// Internal tracking of last `blockoffset` passed to `drop_block`.
236
    drop_block_last: BlockOffset,
237
    /// Optional `Year` value used to start `process_missing_year()`.
238
    /// Only needed for syslog files with datetime format without a year.
239
    missing_year: Option<Year>,
240
    /// The last [`Error`], if any, as a `String`. Set by [`set_error`].
241
    ///
242
    /// Annoyingly, cannot [Clone or Copy `Error`].
243
    ///
244
    /// [`Error`]: std::io::Error
245
    /// [Clone or Copy `Error`]: https://github.com/rust-lang/rust/issues/24135
246
    /// [`set_error`]: self::SyslogProcessor#method.set_error
247
    // TRACKING: https://github.com/rust-lang/rust/issues/24135
248
    error: Option<String>,
249
}
250

251
impl Debug for SyslogProcessor {
252
    fn fmt(
×
253
        &self,
×
254
        f: &mut fmt::Formatter,
×
255
    ) -> fmt::Result {
×
256
        f.debug_struct("SyslogProcessor")
×
257
            .field("Path", &self.path)
×
258
            .field("Processing Stage", &self.processingstage)
×
259
            .field("BlockSz", &self.blocksz)
×
260
            .field("TimeOffset", &self.tz_offset)
×
261
            .field("filter_dt_after_opt", &self.filter_dt_after_opt)
×
262
            .field("filter_dt_before_opt", &self.filter_dt_before_opt)
×
263
            .field("BO Analysis done?", &self.blockzero_analysis_done)
×
264
            .field("filetype", &self.filetype())
×
265
            .field("Reprocessed missing year?", &self.did_process_missing_year())
×
266
            .field("Missing Year", &self.missing_year)
×
267
            .field("Error?", &self.error)
×
268
            .finish()
×
269
    }
×
270
}
271

272
// TODO: [2023/04] remove redundant variable prefix name `syslogprocessor_`
273
#[derive(Clone, Debug, Default, Eq, PartialEq)]
274
pub struct SummarySyslogProcessor {
275
    /// `SyslogProcessor::missing_year`
276
    pub syslogprocessor_missing_year: Option<Year>,
277
}
278

279
impl SyslogProcessor {
280
    /// `SyslogProcessor` has it's own miminum requirements for `BlockSz`.
281
    ///
282
    /// Necessary for `blockzero_analysis` functions to have chance at success.
283
    #[doc(hidden)]
284
    #[cfg(any(debug_assertions, test))]
285
    pub const BLOCKSZ_MIN: BlockSz = 0x2;
286

287
    /// Maximum number of datetime patterns for matching the remainder of a syslog file.
288
    const DT_PATTERN_MAX: usize = SyslineReader::DT_PATTERN_MAX;
289

290
    /// `SyslogProcessor` has it's own miminum requirements for `BlockSz`.
291
    ///
292
    /// Necessary for `blockzero_analysis` functions to have chance at success.
293
    #[cfg(not(any(debug_assertions, test)))]
294
    pub const BLOCKSZ_MIN: BlockSz = 0x40;
295

296
    /// Minimum number of bytes needed to perform `blockzero_analysis_bytes`.
297
    ///
298
    /// Pretty sure this is smaller than the smallest possible timestamp that
299
    /// can be processed by the `DTPD!` in `DATETIME_PARSE_DATAS`.
300
    /// In other words, a file that only has a datetimestamp followed by an
301
    /// empty log message.
302
    ///
303
    /// It's okay if this is too small as the later processing stages will
304
    /// be certain of any possible datetime patterns.
305
    pub const BLOCKZERO_ANALYSIS_BYTES_MIN: BlockSz = 6;
306

307
    /// If the first number of bytes are zero bytes (NULL bytes) then
308
    /// stop processing the file. It's extremely unlikely this is a syslog
309
    /// file and more likely it's some sort of binary data file.
310
    pub const BLOCKZERO_ANALYSIS_BYTES_NULL_MAX: usize = 128;
311

312
    /// Allow "streaming stage" to drop data?
313
    /// Compile-time "option" to aid manual debugging.
314
    #[doc(hidden)]
315
    const STREAM_STAGE_DROP: bool = true;
316

317
    /// Use LRU caches in underlying components?
318
    ///
319
    /// XXX: For development and testing experiments!
320
    #[doc(hidden)]
321
    const LRU_CACHE_ENABLE: bool = true;
322

323
    /// Create a new `SyslogProcessor`.
324
    ///
325
    /// **NOTE:** should not attempt any block reads here,
326
    /// similar to other `*Readers::new()`
327
    pub fn new(
98✔
328
        path: FPath,
98✔
329
        filetype: FileType,
98✔
330
        blocksz: BlockSz,
98✔
331
        tz_offset: FixedOffset,
98✔
332
        filter_dt_after_opt: DateTimeLOpt,
98✔
333
        filter_dt_before_opt: DateTimeLOpt,
98✔
334
    ) -> Result<SyslogProcessor> {
98✔
335
        def1n!("({:?}, {:?}, {:?}, {:?})", path, filetype, blocksz, tz_offset);
98✔
336
        if blocksz < SyslogProcessor::BLOCKSZ_MIN {
98✔
337
            return Result::Err(
×
338
                Error::new(
×
339
                    ErrorKind::InvalidInput,
×
340
                    format!(
×
341
                        "BlockSz {0} (0x{0:08X}) is too small, SyslogProcessor has BlockSz minimum {1} (0x{1:08X}) file {2:?}",
×
342
                        blocksz, SyslogProcessor::BLOCKSZ_MIN, &path,
×
343
                    )
×
344
                )
×
345
            );
×
346
        }
98✔
347
        let path_ = path.clone();
98✔
348
        let mut slr = match SyslineReader::new(path, filetype, blocksz, tz_offset) {
98✔
349
            Ok(val) => val,
96✔
350
            Err(err) => {
2✔
351
                def1x!();
2✔
352
                return Result::Err(err);
2✔
353
            }
354
        };
355

356
        if !SyslogProcessor::LRU_CACHE_ENABLE {
96✔
357
            slr.LRU_cache_disable();
×
358
            slr.linereader
×
359
                .LRU_cache_disable();
×
360
            slr.linereader
×
361
                .blockreader
×
362
                .LRU_cache_disable();
×
363
        }
96✔
364

365
        def1x!("return Ok(SyslogProcessor)");
96✔
366

367
        Result::Ok(
96✔
368
            SyslogProcessor {
96✔
369
                syslinereader: slr,
96✔
370
                processingstage: ProcessingStage::Stage0ValidFileCheck,
96✔
371
                path: path_,
96✔
372
                blocksz,
96✔
373
                tz_offset,
96✔
374
                filter_dt_after_opt,
96✔
375
                filter_dt_before_opt,
96✔
376
                blockzero_analysis_done: false,
96✔
377
                drop_block_last: 0,
96✔
378
                missing_year: None,
96✔
379
                error: None,
96✔
380
            }
96✔
381
        )
96✔
382
    }
98✔
383

384
    /// `Count` of [`Line`s] processed.
385
    ///
386
    /// [`Line`s]: crate::data::line::Line
387
    #[inline(always)]
388
    #[allow(dead_code)]
389
    pub fn count_lines(&self) -> Count {
×
390
        self.syslinereader
×
391
            .linereader
×
392
            .count_lines_processed()
×
393
    }
×
394

395
    /// See [`Sysline::count_syslines_stored`].
396
    ///
397
    /// [`Sysline::count_syslines_stored`]: crate::data::sysline::Sysline::count_syslines_stored
398
    #[cfg(test)]
399
    pub fn count_syslines_stored(&self) -> Count {
4✔
400
        self.syslinereader.count_syslines_stored()
4✔
401
    }
4✔
402

403
    /// See [`BlockReader::blocksz`].
404
    ///
405
    /// [`BlockReader::blocksz`]: crate::readers::blockreader::BlockReader#method.blocksz
406
    #[inline(always)]
407
    pub const fn blocksz(&self) -> BlockSz {
77✔
408
        self.syslinereader.blocksz()
77✔
409
    }
77✔
410

411
    /// See [`BlockReader::filesz`].
412
    ///
413
    /// [`BlockReader::filesz`]: crate::readers::blockreader::BlockReader#method.filesz
414
    #[inline(always)]
415
    pub const fn filesz(&self) -> FileSz {
80✔
416
        self.syslinereader.filesz()
80✔
417
    }
80✔
418

419
    /// See [`BlockReader::filetype`].
420
    ///
421
    /// [`BlockReader::filetype`]: crate::readers::blockreader::BlockReader#method.filetype
422
    #[inline(always)]
423
    pub const fn filetype(&self) -> FileType {
4✔
424
        self.syslinereader.filetype()
4✔
425
    }
4✔
426

427
    /// See [`BlockReader::path`].
428
    ///
429
    /// [`BlockReader::path`]: crate::readers::blockreader::BlockReader#method.path
430
    #[inline(always)]
431
    #[allow(dead_code)]
432
    pub const fn path(&self) -> &FPath {
4✔
433
        self.syslinereader.path()
4✔
434
    }
4✔
435

436
    /// See [`BlockReader::block_offset_at_file_offset`].
437
    ///
438
    /// [`BlockReader::block_offset_at_file_offset`]: crate::readers::blockreader::BlockReader#method.block_offset_at_file_offset
439
    #[allow(dead_code)]
440
    pub const fn block_offset_at_file_offset(
×
441
        &self,
×
442
        fileoffset: FileOffset,
×
443
    ) -> BlockOffset {
×
444
        self.syslinereader
×
445
            .block_offset_at_file_offset(fileoffset)
×
446
    }
×
447

448
    /// See [`BlockReader::file_offset_at_block_offset`].
449
    ///
450
    /// [`BlockReader::file_offset_at_block_offset`]: crate::readers::blockreader::BlockReader#method.file_offset_at_block_offset
451
    #[allow(dead_code)]
452
    pub const fn file_offset_at_block_offset(
×
453
        &self,
×
454
        blockoffset: BlockOffset,
×
455
    ) -> FileOffset {
×
456
        self.syslinereader
×
457
            .file_offset_at_block_offset(blockoffset)
×
458
    }
×
459

460
    /// See [`BlockReader::file_offset_at_block_offset_index`].
461
    ///
462
    /// [`BlockReader::file_offset_at_block_offset_index`]: crate::readers::blockreader::BlockReader#method.file_offset_at_block_offset_index
463
    #[allow(dead_code)]
464
    pub const fn file_offset_at_block_offset_index(
×
465
        &self,
×
466
        blockoffset: BlockOffset,
×
467
        blockindex: BlockIndex,
×
468
    ) -> FileOffset {
×
469
        self.syslinereader
×
470
            .file_offset_at_block_offset_index(blockoffset, blockindex)
×
471
    }
×
472

473
    /// See [`BlockReader::block_index_at_file_offset`].
474
    ///
475
    /// [`BlockReader::block_index_at_file_offset`]: crate::readers::blockreader::BlockReader#method.block_index_at_file_offset
476
    #[allow(dead_code)]
477
    pub const fn block_index_at_file_offset(
×
478
        &self,
×
479
        fileoffset: FileOffset,
×
480
    ) -> BlockIndex {
×
481
        self.syslinereader
×
482
            .block_index_at_file_offset(fileoffset)
×
483
    }
×
484

485
    /// See [`BlockReader::count_blocks`].
486
    ///
487
    /// [`BlockReader::count_blocks`]: crate::readers::blockreader::BlockReader#method.count_blocks
488
    #[allow(dead_code)]
489
    pub const fn count_blocks(&self) -> Count {
×
490
        self.syslinereader
×
491
            .count_blocks()
×
492
    }
×
493

494
    /// See [`BlockReader::blockoffset_last`].
495
    ///
496
    /// [`BlockReader::blockoffset_last`]: crate::readers::blockreader::BlockReader#method.blockoffset_last
497
    #[allow(dead_code)]
498
    pub const fn blockoffset_last(&self) -> BlockOffset {
×
499
        self.syslinereader
×
500
            .blockoffset_last()
×
501
    }
×
502

503
    /// See [`BlockReader::fileoffset_last`].
504
    ///
505
    /// [`BlockReader::fileoffset_last`]: crate::readers::blockreader::BlockReader#method.fileoffset_last
506
    pub const fn fileoffset_last(&self) -> FileOffset {
5✔
507
        self.syslinereader
5✔
508
            .fileoffset_last()
5✔
509
    }
5✔
510

511
    /// See [`LineReader::charsz`].
512
    ///
513
    /// [`LineReader::charsz`]: crate::readers::linereader::LineReader#method.charsz
514
    #[allow(dead_code)]
515
    pub const fn charsz(&self) -> usize {
5✔
516
        self.syslinereader.charsz()
5✔
517
    }
5✔
518

519
    /// See [`BlockReader::mtime`].
520
    ///
521
    /// [`BlockReader::mtime`]: crate::readers::blockreader::BlockReader#method.mtime
522
    pub fn mtime(&self) -> SystemTime {
4✔
523
        self.syslinereader.mtime()
4✔
524
    }
4✔
525

526
    /// Did this `SyslogProcessor` run `process_missing_year()` ?
527
    fn did_process_missing_year(&self) -> bool {
5✔
528
        self.missing_year.is_some()
5✔
529
    }
5✔
530

531
    /// Did this `SyslogProcessor` run `process_uptime()` ?
532
    fn did_process_uptime(&self) -> bool {
×
533
        self.systemtime_at_uptime_zero().is_some()
×
534
    }
×
535

536
    /// Return `drop_data` value.
537
    pub const fn is_drop_data(&self) -> bool {
38✔
538
        self.syslinereader.is_drop_data()
38✔
539
    }
38✔
540

541
    /// store an `Error` that occurred. For later printing during `--summary`.
542
    // XXX: duplicates `FixedStructReader.set_error`
543
    fn set_error(
×
544
        &mut self,
×
545
        error: &Error,
×
546
    ) {
×
547
        def1ñ!("{:?}", error);
×
548
        let mut error_string: String = error.kind().to_string();
×
549
        error_string.push_str(": ");
×
550
        error_string.push_str(error.kind().to_string().as_str());
×
551
        // print the error but avoid printing the same error more than once
552
        // XXX: This is somewhat a hack as it's possible the same error, with the
553
        //      the same error message, could occur more than once.
554
        //      Considered another way, this function `set_error` may get called
555
        //      too often. The responsibility for calling `set_error` is haphazard.
556
        match &self.error {
×
557
            Some(err_s) => {
×
558
                if err_s != &error_string {
×
559
                    e_err!("{}", error);
×
560
                }
×
561
            }
562
            None => {
×
563
                e_err!("{}", error);
×
564
            }
×
565
        }
566
        if let Some(ref _err) = self.error {
×
567
            de_wrn!("skip overwrite of previous Error {:?} with Error ({:?})", _err, error);
×
568
            return;
×
569
        }
×
570
        self.error = Some(error_string);
×
571
    }
×
572

573
    /// Syslog files wherein the datetime format that does not include a year
574
    /// must have special handling.
575
    ///
576
    /// The last [`Sysline`] in the file is presumed to share the same year as
577
    /// the `mtime` (stored by the underlying [`BlockReader`] instance).
578
    /// The entire file is read from end to beginning (in reverse) (unless
579
    /// a `filter_dt_after_opt` is passed that coincides with the found
580
    /// syslines). The year is tracked and updated for each sysline.
581
    /// If there is jump backwards in time, that is presumed to be a
582
    /// year changeover.
583
    ///
584
    /// For example, given syslog contents
585
    ///
586
    /// ```text
587
    /// Nov 1 12:00:00 hello
588
    /// Dec 1 12:00:00 good morning
589
    /// Jan 1 12:00:00 goodbye
590
    /// ```
591
    ///
592
    /// and file `mtime` that is datetime _January 1 12:00:00 2015_,
593
    /// then the last `Sysline` "Jan 1 12:00:00 goodbye" is presumed to be in
594
    /// year 2015.
595
    /// The preceding `Sysline` "Dec 1 12:00:00 goodbye" is then processed.
596
    /// An apparent backwards jump is seen _Jan 1_ to _Dec 1_.
597
    /// From this, it can be concluded the _Dec 1_ refers to a prior year, 2014.
598
    ///
599
    /// Typically, when a datetime filter is passed, a special binary search is
600
    /// done to find the desired syslog line, reducing resource usage. Whereas,
601
    /// files processed here must be read linearly and in their entirety
602
    /// Or, if `filter_dt_after_opt` is passed then the file is read to the
603
    /// first `sysline.dt()` (datetime) that is
604
    /// `Result_Filter_DateTime1::OccursBefore` the
605
    /// `filter_dt_after_opt`.
606
    ///
607
    /// [`Sysline`]: crate::data::sysline::Sysline
608
    /// [`BlockReader`]: crate::readers::blockreader::BlockReader
609
    /// [`DateTimeL`]: crate::data::datetime::DateTimeL
610
    // BUG: does not revise year guesstimation based on encountering leap date February 29
611
    //      See Issue #245
612
    pub fn process_missing_year(
5✔
613
        &mut self,
5✔
614
        mtime: SystemTime,
5✔
615
        filter_dt_after_opt: &DateTimeLOpt,
5✔
616
    ) -> FileProcessingResultBlockZero {
5✔
617
        defn!("({:?}, {:?})", mtime, filter_dt_after_opt);
5✔
618
        debug_assert!(!self.did_process_missing_year(), "process_missing_year() must only be called once");
5✔
619
        let dt_mtime: DateTimeL = systemtime_to_datetime(&self.tz_offset, &mtime);
5✔
620
        defo!("converted dt_mtime {:?}", dt_mtime);
5✔
621
        let year: Year = dt_mtime.date_naive().year() as Year;
5✔
622
        self.missing_year = Some(year);
5✔
623
        defo!("converted missing_year {:?}", self.missing_year);
5✔
624
        let mut year_opt: Option<Year> = Some(year);
5✔
625
        defo!("year_opt {:?}", year_opt);
5✔
626
        let charsz_fo: FileOffset = self.charsz() as FileOffset;
5✔
627

628
        // The previously stored `Sysline`s have a filler year that is most likely incorrect.
629
        // The underlying `Sysline` instance cannot be updated behind an `Arc`.
630
        // Those syslines must be dropped and the entire file processed again.
631
        // However, underlying `Line` and `Block` are still valid; do not reprocess those.
632
        self.syslinereader
5✔
633
            .clear_syslines();
5✔
634

635
        // read all syslines in reverse
636
        let mut fo_prev: FileOffset = self.fileoffset_last();
5✔
637
        let mut syslinep_prev_opt: Option<SyslineP> = None;
5✔
638
        loop {
639
            let syslinep: SyslineP = match self
16✔
640
                .syslinereader
16✔
641
                .find_sysline_year(fo_prev, &year_opt)
16✔
642
            {
643
                ResultS3SyslineFind::Found((_fo, syslinep)) => {
16✔
644
                    defo!(
16✔
645
                        "Found {} Sysline @[{}, {}] datetime: {:?})",
16✔
646
                        _fo,
647
                        (*syslinep).fileoffset_begin(),
16✔
648
                        (*syslinep).fileoffset_end(),
16✔
649
                        (*syslinep).dt()
16✔
650
                    );
651
                    syslinep
16✔
652
                }
653
                ResultS3SyslineFind::Done => {
×
654
                    defo!("Done, break;");
×
655
                    break;
×
656
                }
657
                ResultS3SyslineFind::Err(err) => {
×
658
                    self.set_error(&err);
×
659
                    defx!("return FileErrIo({:?})", err);
×
660
                    return FileProcessingResultBlockZero::FileErrIoPath(err);
×
661
                }
662
            };
663
            // TODO: [2022/07/27] add fn `syslinereader.find_sysline_year_rev` to hide these char offset
664
            //       details (put them into a struct that is meant to understand these details)
665
            let fo_prev_prev: FileOffset = fo_prev;
16✔
666
            fo_prev = (*syslinep).fileoffset_begin();
16✔
667
            // check if datetime has suddenly jumped backwards.
668
            // if date has jumped backwards, then remove sysline, update the year, and process the file
669
            // from that fileoffset again
670
            match syslinep_prev_opt {
16✔
671
                Some(syslinep_prev) => {
11✔
672
                    // normally `dt_cur` should have a datetime *before or equal* to `dt_prev`
673
                    // but if not, then there was probably a year rollover
674
                    if (*syslinep).dt() > (*syslinep_prev).dt() {
11✔
675
                        let diff: Duration = *(*syslinep).dt() - *(*syslinep_prev).dt();
×
676
                        if diff > *BACKWARDS_TIME_JUMP_MEANS_NEW_YEAR {
×
677
                            year_opt = Some(year_opt.unwrap() - 1);
×
678
                            defo!("year_opt updated {:?}", year_opt);
×
679
                            self.syslinereader
×
680
                                .remove_sysline(fo_prev);
×
681
                            fo_prev = fo_prev_prev;
×
682
                            syslinep_prev_opt = Some(syslinep_prev.clone());
×
683
                            continue;
×
684
                        }
×
685
                    }
11✔
686
                }
687
                None => {}
5✔
688
            }
689
            if fo_prev < charsz_fo {
16✔
690
                defo!("fo_prev {} break;", fo_prev);
3✔
691
                // fileoffset is at the beginning of the file (or, cannot be moved back any more)
692
                break;
3✔
693
            }
13✔
694
            // if user-passed `--dt-after` and the sysline is prior to that filter then
695
            // stop processing
696
            match dt_after_or_before(syslinep.dt(), filter_dt_after_opt) {
13✔
697
                Result_Filter_DateTime1::OccursBefore => {
698
                    defo!("dt_after_or_before({:?},  {:?}) returned OccursBefore; break", syslinep.dt(), filter_dt_after_opt);
2✔
699
                    break;
2✔
700
                }
701
                Result_Filter_DateTime1::OccursAtOrAfter | Result_Filter_DateTime1::Pass => {},
11✔
702
            }
703
            // search for preceding sysline
704
            fo_prev -= charsz_fo;
11✔
705
            if fo_prev >= fo_prev_prev {
11✔
706
                // This will happen in case where the very first line of the file
707
                // holds a sysline with datetime pattern without a year, and that
708
                // sysline datetime pattern is different than all
709
                // proceeding syslines that have a year. (and it should only happen then)
710
                // Elicited by example in Issue #74
711
                de_err!("fo_prev {} ≥ {} fo_prev_prev, expected <; something is wrong", fo_prev, fo_prev_prev);
×
712
                // must break otherwise end up in an infinite loop
713
                break;
×
714
            }
11✔
715
            syslinep_prev_opt = Some(syslinep.clone());
11✔
716
        } // end loop
717
        defx!("return FileOk");
5✔
718

719
        FileProcessingResultBlockZero::FileOk
5✔
720
    }
5✔
721

722
    fn systemtime_at_uptime_zero(&self) -> Option<SystemTime>{
×
723
        self.syslinereader.systemtime_at_uptime_zero
×
724
    }
×
725

726
    pub fn process_uptime(
×
727
        &mut self,
×
728
    ) -> FileProcessingResultBlockZero {
×
729
        defn!();
×
730
        debug_assert!(!self.did_process_uptime(), "did_process_uptime() must only be called once");
×
731

732
        let fo_last = self.fileoffset_last();
×
733
        defo!("find_sysline(fo_last={})", fo_last);
×
734
        let syslinep = match self.find_sysline(fo_last) {
×
735
            ResultS3SyslineFind::Found((_fo, syslinep_)) => {
×
736
                defo!("found sysline at fo_last={} {:?}", fo_last, syslinep_);
×
737

738
                syslinep_
×
739
            }
740
            ResultS3SyslineFind::Done => {
×
741
                defx!("No sysline found");
×
742
                return FileProcessingResultBlockZero::FileErrNoSyslinesFound;
×
743
            }
744
            ResultS3SyslineFind::Err(err) => {
×
745
                defx!("error finding sysline: {:?}", err);
×
746
                return FileProcessingResultBlockZero::FileErrIo(err);
×
747
            }
748
        };
749
        let dt = syslinep.dt();
×
750
        let diff_ = datetime_minus_systemtime(&dt, &UPTIME_DEFAULT_OFFSET);
×
751
        defo!("diff_ from UPTIME_DEFAULT_OFFSET {:?}", diff_);
×
752
        let diff_secs = diff_.num_seconds();
×
753
        defo!("diff_secs {:?}", diff_secs);
×
754
        let mut diff_nanos = diff_.subsec_nanos();
×
755
        defo!("diff_nanos {:?}", diff_nanos);
×
756
        if diff_nanos < 0 {
×
757
            diff_nanos = 0;
×
758
        }
×
759
        let diffs: StdDuration = StdDuration::new(
×
760
            diff_secs as u64,
×
761
            diff_nanos as u32,
×
762
        );
763
        defo!("diffs {:?}", diffs);
×
764
        defo!("mtime()");
×
765
        let mtime = self.mtime();
×
766
        defo!("mtime {:?} (as DateTime {:?})",
×
767
              mtime, systemtime_to_datetime(&self.tz_offset, &mtime));
×
768
        // std::time::Duration is unsigned whereas chrono::Duration is signed.
769
        let st_at_zero = if diff_secs > 0 {
×
770
            defo!("checked_sub({:?})", diffs);
×
771
            match mtime.checked_sub(diffs) {
×
772
                Some(st) => st,
×
773
                None => {
774
                    defx!("failed to calculate systemtime at uptime zero");
×
775
                    return FileProcessingResultBlockZero::FileErrIo(std::io::Error::new(
×
776
                        std::io::ErrorKind::Other,
×
777
                        "failed to calculate systemtime at uptime zero",
×
778
                    ));
×
779
                }
780
            }
781
        } else {
782
            defo!("checked_add({:?})", diffs);
×
783
            match mtime.checked_add(diffs) {
×
784
                Some(st) => st,
×
785
                None => {
786
                    defx!("failed to calculate systemtime at uptime zero");
×
787
                    return FileProcessingResultBlockZero::FileErrIo(std::io::Error::new(
×
788
                        std::io::ErrorKind::Other,
×
789
                        "failed to calculate systemtime at uptime zero",
×
790
                    ));
×
791
                }
792
            }
793
        };
794
        self.syslinereader.systemtime_at_uptime_zero = Some(st_at_zero);
×
795
        defo!("systemtime_at_uptime_zero is  {:?}", self.syslinereader.systemtime_at_uptime_zero);
×
796
        #[cfg(debug_assertions)]
797
        {
798
            let d = systemtime_to_datetime(
×
799
                &self.tz_offset,
×
800
                &st_at_zero,
×
801
            );
802
            defo!("systemtime_at_uptime_zero as DateTime {:?}", d);
×
803
        }
804

805
        // The systemtime at uptime zero has been discovered.
806
        // So clear the lines that previously used the stand-in value for
807
        // `systemtime_at_uptime_zero`.
808
        self.syslinereader.clear_syslines();
×
809
        // The syslines gathered after this point will use the
810
        // correct `systemtime_at_uptime_zero`.
811

812
        defx!("return FileOk");
×
813

814
        FileProcessingResultBlockZero::FileOk
×
815
    }
×
816

817
    /// See [`SyslineReader::is_sysline_last`].
818
    ///
819
    /// [`SyslineReader::is_sysline_last`]: crate::readers::syslinereader::SyslineReader#method.is_sysline_last
820
    pub fn is_sysline_last(
×
821
        &self,
×
822
        syslinep: &SyslineP,
×
823
    ) -> bool {
×
824
        self.syslinereader
×
825
            .is_sysline_last(syslinep)
×
826
    }
×
827

828
    /// Try to `drop` data associated with the [`Block`] at [`BlockOffset`].
829
    /// This includes dropping associated [`Sysline`]s and [`Line`]s.
830
    /// This calls [`SyslineReader::drop_data`].
831
    ///
832
    /// _The caller must know what they are doing!_
833
    ///
834
    /// [`BlockOffset`]: crate::readers::blockreader::BlockOffset
835
    /// [`Sysline`]: crate::data::sysline::Sysline
836
    /// [`Line`]: crate::data::line::Line
837
    /// [`Block`]: crate::readers::blockreader::Block
838
    pub fn drop_data(
14✔
839
        &mut self,
14✔
840
        blockoffset: BlockOffset,
14✔
841
    ) -> bool {
14✔
842
        def1n!("({})", blockoffset);
14✔
843
        self.assert_stage(ProcessingStage::Stage3StreamSyslines);
14✔
844

845
        if ! self.is_drop_data() {
14✔
846
            def1x!("return false; is_drop_data() is false");
×
847
            return false;
×
848
        }
14✔
849

850
        // `syslinereader.drop_data` is an expensive function, skip if possible.
851
        if blockoffset == self.drop_block_last {
14✔
852
            def1x!("({}) skip block, return true", blockoffset);
5✔
853
            return false;
5✔
854
        }
9✔
855

856
        if self
9✔
857
            .syslinereader
9✔
858
            .drop_data(blockoffset)
9✔
859
        {
860
            self.drop_block_last = blockoffset;
4✔
861
            def1x!("({}) return true", blockoffset);
4✔
862
            return true;
4✔
863
        }
5✔
864

865
        def1x!("({}) return false", blockoffset);
5✔
866
        false
5✔
867
    }
14✔
868

869
    /// Call [`drop_data`] for the data assocaited with the [`Block`]
870
    /// *preceding* the first block of the passed [`Sysline`].
871
    ///
872
    /// _The caller must know what they are doing!_
873
    ///
874
    /// [`drop_data`]: Self#method.drop_data
875
    /// [`Block`]: crate::readers::blockreader::Block
876
    /// [`Sysline`]: crate::data::sysline::Sysline
877
    pub fn drop_data_try(
24✔
878
        &mut self,
24✔
879
        syslinep: &SyslineP,
24✔
880
    ) -> bool {
24✔
881
        if !SyslogProcessor::STREAM_STAGE_DROP {
24✔
882
            de_wrn!("drop_data_try() called but SyslogProcessor::STREAM_STAGE_DROP is false");
×
883
            return false;
×
884
        }
24✔
885
        if !self.is_drop_data() {
24✔
886
            def1ñ!("is_drop_data() is false; return false");
×
887
            return false;
×
888
        }
24✔
889

890
        let bo_first: BlockOffset = (*syslinep).blockoffset_first();
24✔
891
        if bo_first > 1 {
24✔
892
            def1ñ!();
14✔
893
            return self.drop_data(bo_first - 2);
14✔
894
        }
10✔
895

896
        false
10✔
897
    }
24✔
898

899
    /// Calls [`self.syslinereader.find_sysline(fileoffset)`],
900
    /// and in some cases calls private function `drop_block` to drop
901
    /// previously processed [`Sysline`], [`Line`], and [`Block`s].
902
    ///
903
    /// This is what implements the "streaming" in "[streaming stage]".
904
    ///
905
    /// [`self.syslinereader.find_sysline(fileoffset)`]: crate::readers::syslinereader::SyslineReader#method.find_sysline
906
    /// [`Block`s]: crate::readers::blockreader::Block
907
    /// [`Line`]: crate::data::line::Line
908
    /// [`Sysline`]: crate::data::sysline::Sysline
909
    /// [streaming stage]: crate::readers::syslogprocessor::ProcessingStage#variant.Stage3StreamSyslines
910
    pub fn find_sysline(
88✔
911
        &mut self,
88✔
912
        fileoffset: FileOffset,
88✔
913
    ) -> ResultS3SyslineFind {
88✔
914
        defn!("({})", fileoffset);
88✔
915
        let result: ResultS3SyslineFind = self
88✔
916
            .syslinereader
88✔
917
            .find_sysline(fileoffset);
88✔
918
        match result {
88✔
919
            ResultS3SyslineFind::Found(_) => {}
74✔
920
            ResultS3SyslineFind::Done => {}
14✔
921
            ResultS3SyslineFind::Err(ref err) => {
×
922
                self.set_error(err);
×
923
            }
×
924
        }
925
        defx!();
88✔
926

927
        result
88✔
928
    }
88✔
929

930
    /// Wrapper function for [`SyslineReader::find_sysline_between_datetime_filters`].
931
    /// Keeps a custom copy of any returned `Error` at `self.error`.
932
    ///
933
    /// [`SyslineReader::find_sysline_between_datetime_filters`]: crate::readers::syslinereader::SyslineReader#method.find_sysline_between_datetime_filters
934
    //
935
    // TODO: [2022/06/20] the `find` functions need consistent naming,
936
    //       `find_next`, `find_between`, `find_…` . The current design has
937
    //       the public-facing `find_` functions falling back on potential file-wide binary-search
938
    //       The binary-search only needs to be done during the stage 2. During stage 3, a simpler
939
    //       linear sequential search is more suitable, and more intuitive.
940
    //       More refactoring is in order.
941
    //       Also, a linear search can better detect rollover (i.e. when sysline datetime is missing year).
942
    // TODO: [2023/03/06] add stats tracking in `find` functions for number of
943
    //       "jumps" or bounces or fileoffset changes to confirm big-O
944
    #[inline(always)]
945
    pub fn find_sysline_between_datetime_filters(
5✔
946
        &mut self,
5✔
947
        fileoffset: FileOffset,
5✔
948
    ) -> ResultS3SyslineFind {
5✔
949
        defn!("({})", fileoffset);
5✔
950

951
        let result = match self
5✔
952
            .syslinereader
5✔
953
            .find_sysline_between_datetime_filters(
5✔
954
                fileoffset,
5✔
955
                &self.filter_dt_after_opt,
5✔
956
                &self.filter_dt_before_opt,
5✔
957
            ) {
5✔
958
            ResultS3SyslineFind::Err(err) => {
×
959
                self.set_error(&err);
×
960

961
                ResultS3SyslineFind::Err(err)
×
962
            }
963
            val => val,
5✔
964
        };
965

966
        defx!("({})", fileoffset);
5✔
967

968
        result
5✔
969
    }
5✔
970

971
    /// Wrapper function for a recurring sanity check.
972
    ///
973
    /// Good for checking functions `process_stage…` are called in
974
    /// the correct order.
975
    // XXX: is there a rust-ic way to enforce stage procession behavior
976
    //      at compile-time? It's a fairly simple enumerated type. Could a
977
    //      `match` tree (or something like that) be used?
978
    //      run-time checks of rust enum values seems hacky.
979
    #[inline(always)]
980
    fn assert_stage(
469✔
981
        &self,
469✔
982
        stage_expact: ProcessingStage,
469✔
983
    ) {
469✔
984
        debug_assert_eq!(
469✔
985
            self.processingstage, stage_expact,
986
            "Unexpected Processing Stage {:?}, expected Processing Stage {:?}",
×
987
            self.processingstage, stage_expact,
988
        );
989
    }
469✔
990

991
    /// Stage 0 does some sanity checks on the file.
992
    // TODO: this is redundant and has already been performed by functions in
993
    //       `filepreprocessor` and `BlockReader::new`.
994
    pub fn process_stage0_valid_file_check(&mut self) -> FileProcessingResultBlockZero {
80✔
995
        defn!();
80✔
996
        // sanity check calls are in correct order
997
        self.assert_stage(ProcessingStage::Stage0ValidFileCheck);
80✔
998
        self.processingstage = ProcessingStage::Stage0ValidFileCheck;
80✔
999

1000
        if self.filesz() == 0 {
80✔
1001
            defx!("filesz 0; return {:?}", FileProcessingResultBlockZero::FileErrEmpty);
2✔
1002
            return FileProcessingResultBlockZero::FileErrEmpty;
2✔
1003
        }
78✔
1004
        defx!("return {:?}", FileProcessingResultBlockZero::FileOk);
78✔
1005

1006
        FileProcessingResultBlockZero::FileOk
78✔
1007
    }
80✔
1008

1009
    /// Stage 1: Can [`Line`s] and [`Sysline`s] be parsed from the first block
1010
    /// (block zero)?
1011
    ///
1012
    /// [`Sysline`s]: crate::data::sysline::Sysline
1013
    /// [`Line`s]: crate::data::line::Line
1014
    pub fn process_stage1_blockzero_analysis(&mut self) -> FileProcessingResultBlockZero {
77✔
1015
        defn!();
77✔
1016
        self.assert_stage(ProcessingStage::Stage0ValidFileCheck);
77✔
1017
        self.processingstage = ProcessingStage::Stage1BlockzeroAnalysis;
77✔
1018

1019
        let result: FileProcessingResultBlockZero = self.blockzero_analysis();
77✔
1020
        // stored syslines may be zero if a "partial" `Line` was examined
1021
        // e.g. an incomplete and temporary `Line` instance was examined.
1022
        defo!(
77✔
1023
            "blockzero_analysis() stored syslines {}",
77✔
1024
            self.syslinereader
77✔
1025
                .count_syslines_stored()
77✔
1026
        );
1027
        match result {
77✔
1028
            FileProcessingResult::FileOk => {}
39✔
1029
            // skip further processing if not `FileOk`
1030
            _ => {
1031
                defx!("return {:?}", result);
38✔
1032
                return result;
38✔
1033
            }
1034
        }
1035

1036
        defx!("return {:?}", result);
39✔
1037

1038
        result
39✔
1039
    }
77✔
1040

1041
    /// Stage 2: Given the an optional datetime filter (user-passed
1042
    /// `--dt-after`), can a log message with a datetime after that filter be
1043
    /// found?
1044
    pub fn process_stage2_find_dt(
8✔
1045
        &mut self,
8✔
1046
        filter_dt_after_opt: &DateTimeLOpt,
8✔
1047
    ) -> FileProcessingResultBlockZero {
8✔
1048
        defn!();
8✔
1049
        self.assert_stage(ProcessingStage::Stage1BlockzeroAnalysis);
8✔
1050
        self.processingstage = ProcessingStage::Stage2FindDt;
8✔
1051

1052
        // datetime formats without a year requires special handling
1053
        if !self.syslinereader.dt_pattern_has_year() &&
8✔
1054
            !self.syslinereader.dt_pattern_uptime()
4✔
1055
        {
1056
            defo!("!dt_pattern_has_year() && !dt_pattern_uptime()");
4✔
1057
            let mtime: SystemTime = self.mtime();
4✔
1058
            match self.process_missing_year(mtime, filter_dt_after_opt) {
4✔
1059
                FileProcessingResultBlockZero::FileOk => {}
4✔
1060
                result => {
×
1061
                    defx!("Bad result {:?}", result);
×
1062
                    return result;
×
1063
                }
1064
            }
1065
        } else if self.syslinereader.dt_pattern_uptime() {
4✔
1066
            defo!("dt_pattern_uptime()");
×
1067
            match self.process_uptime() {
×
1068
                FileProcessingResultBlockZero::FileOk => {}
×
1069
                result => {
×
1070
                    defx!("Bad result {:?}", result);
×
1071
                    return result;
×
1072
                }
1073
            }
1074
        }
4✔
1075

1076
        defx!();
8✔
1077

1078
        FileProcessingResultBlockZero::FileOk
8✔
1079
    }
8✔
1080

1081
    /// Stage 3: during "[streaming]", processed and printed data stored by
1082
    /// underlying "Readers" is proactively dropped
1083
    /// (removed from process memory).
1084
    ///
1085
    /// Also see [`find_sysline`].
1086
    ///
1087
    /// [streaming]: ProcessingStage#variant.Stage3StreamSyslines
1088
    /// [`find_sysline`]: self::SyslogProcessor#method.find_sysline
1089
    pub fn process_stage3_stream_syslines(&mut self) -> FileProcessingResultBlockZero {
4✔
1090
        defñ!();
4✔
1091
        self.assert_stage(ProcessingStage::Stage2FindDt);
4✔
1092
        self.processingstage = ProcessingStage::Stage3StreamSyslines;
4✔
1093

1094
        FileProcessingResultBlockZero::FileOk
4✔
1095
    }
4✔
1096

1097
    /// Stage 4: no more [`Sysline`s] to process. Create and return a
1098
    /// [`Summary`].
1099
    ///
1100
    /// [`Summary`]: crate::readers::summary::Summary
1101
    /// [`Sysline`s]: crate::data::sysline::Sysline
1102
    pub fn process_stage4_summary(&mut self) -> Summary {
1✔
1103
        defñ!();
1✔
1104
        // XXX: this can be called from various stages, no need to assert
1105
        self.processingstage = ProcessingStage::Stage4Summary;
1✔
1106

1107
        self.summary_complete()
1✔
1108
    }
1✔
1109

1110
    /// Review bytes in the first block ("zero block").
1111
    /// If enough `Line` found then return [`FileOk`]
1112
    /// else return [`FileErrNoLinesFound`].
1113
    ///
1114
    /// [`FileOk`]: self::FileProcessingResultBlockZero
1115
    /// [`FileErrNoLinesFound`]: self::FileProcessingResultBlockZero
1116
    pub(super) fn blockzero_analysis_bytes(&mut self) -> FileProcessingResultBlockZero {
77✔
1117
        defn!();
77✔
1118
        self.assert_stage(ProcessingStage::Stage1BlockzeroAnalysis);
77✔
1119

1120
        let blockp: BlockP = match self
77✔
1121
            .syslinereader
77✔
1122
            .linereader
77✔
1123
            .blockreader
77✔
1124
            .read_block(0)
77✔
1125
        {
1126
            ResultS3ReadBlock::Found(blockp_) => blockp_,
77✔
1127
            ResultS3ReadBlock::Done => {
×
1128
                defx!("return FileErrEmpty");
×
1129
                return FileProcessingResultBlockZero::FileErrEmpty;
×
1130
            }
1131
            ResultS3ReadBlock::Err(err) => {
×
1132
                self.set_error(&err);
×
1133
                defx!("return FileErrIo({:?})", err);
×
1134
                return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1135
            }
1136
        };
1137
        // if the first block is too small then there will not be enough
1138
        // data to parse a `Line` or `Sysline`
1139
        let blocksz0: BlockSz = (*blockp).len() as BlockSz;
77✔
1140
        let require_sz: BlockSz = std::cmp::min(Self::BLOCKZERO_ANALYSIS_BYTES_MIN, self.blocksz());
77✔
1141
        defo!("blocksz0 {} < {} require_sz", blocksz0, require_sz);
77✔
1142
        if blocksz0 < require_sz {
77✔
1143
            defx!("return FileErrTooSmall");
9✔
1144
            return FileProcessingResultBlockZero::FileErrTooSmall;
9✔
1145
        }
68✔
1146
        // if the first `BLOCKZERO_ANALYSIS_BYTES_NULL_MAX` bytes are all
1147
        // zero then this is not a text file and processing should stop.
1148
        if (*blockp).iter().take(Self::BLOCKZERO_ANALYSIS_BYTES_NULL_MAX).all(|&b| b == 0) {
210✔
1149
            defx!("return FileErrNullBytes");
2✔
1150
            return FileProcessingResultBlockZero::FileErrNullBytes;
2✔
1151
        }
66✔
1152

1153
        defx!("return FileOk");
66✔
1154

1155
        FileProcessingResultBlockZero::FileOk
66✔
1156
    }
77✔
1157

1158
    /// Attempt to find a minimum number of [`Line`s] within the first block
1159
    /// (block zero).
1160
    /// If enough `Line` found then return [`FileOk`]
1161
    /// else return [`FileErrNoLinesFound`].
1162
    ///
1163
    /// [`Line`s]: crate::data::line::Line
1164
    /// [`FileOk`]: self::FileProcessingResultBlockZero
1165
    /// [`FileErrNoLinesFound`]: self::FileProcessingResultBlockZero
1166
    pub(super) fn blockzero_analysis_lines(&mut self) -> FileProcessingResultBlockZero {
66✔
1167
        defn!();
66✔
1168
        self.assert_stage(ProcessingStage::Stage1BlockzeroAnalysis);
66✔
1169

1170
        let blockp: BlockP = match self
66✔
1171
            .syslinereader
66✔
1172
            .linereader
66✔
1173
            .blockreader
66✔
1174
            .read_block(0)
66✔
1175
        {
1176
            ResultS3ReadBlock::Found(blockp_) => blockp_,
66✔
1177
            ResultS3ReadBlock::Done => {
×
1178
                defx!("return FileErrEmpty");
×
1179
                return FileProcessingResultBlockZero::FileErrEmpty;
×
1180
            }
1181
            ResultS3ReadBlock::Err(err) => {
×
1182
                self.set_error(&err);
×
1183
                defx!("return FileErrIo({:?})", err);
×
1184
                return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1185
            }
1186
        };
1187
        let blocksz0: BlockSz = (*blockp).len() as BlockSz;
66✔
1188
        let mut _partial_found = false;
66✔
1189
        let mut fo: FileOffset = 0;
66✔
1190
        // how many lines have been found?
1191
        let mut found: Count = 0;
66✔
1192
        // must find at least this many lines in block zero to be FileOk
1193
        let found_min: Count = *BLOCKZERO_ANALYSIS_LINE_COUNT_MIN_MAP
66✔
1194
            .get(&blocksz0)
66✔
1195
            .unwrap();
66✔
1196
        defx!("block zero blocksz {} found_min {}", blocksz0, found_min);
66✔
1197
        // find `found_min` Lines or whatever can be found within block 0
1198
        while found < found_min {
101✔
1199
            fo = match self
66✔
1200
                .syslinereader
66✔
1201
                .linereader
66✔
1202
                .find_line_in_block(fo)
66✔
1203
            {
1204
                (ResultS3LineFind::Found((fo_next, _linep)), _) => {
39✔
1205
                    found += 1;
39✔
1206

1207
                    fo_next
39✔
1208
                }
1209
                (ResultS3LineFind::Done, partial) => {
27✔
1210
                    match partial {
27✔
1211
                        Some(_) => {
27✔
1212
                            found += 1;
27✔
1213
                            _partial_found = true;
27✔
1214
                        },
27✔
1215
                        None => {}
×
1216
                    }
1217
                    break;
27✔
1218
                }
1219
                (ResultS3LineFind::Err(err), _) => {
×
1220
                    self.set_error(&err);
×
1221
                    defx!("return FileErrIo({:?})", err);
×
1222
                    return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1223
                }
1224
            };
1225
            if 0 != self
39✔
1226
                .syslinereader
39✔
1227
                .linereader
39✔
1228
                .block_offset_at_file_offset(fo)
39✔
1229
            {
1230
                break;
4✔
1231
            }
35✔
1232
        }
1233

1234
        let fpr: FileProcessingResultBlockZero = match found >= found_min {
66✔
1235
            true => FileProcessingResultBlockZero::FileOk,
66✔
1236
            false => FileProcessingResultBlockZero::FileErrNoLinesFound,
×
1237
        };
1238

1239
        defx!("found {} lines, partial_found {}, require {} lines, return {:?}", found, _partial_found, found_min, fpr);
66✔
1240

1241
        fpr
66✔
1242
    }
66✔
1243

1244
    /// Attempt to find a minimum number of [`Sysline`] within the first block.
1245
    /// If enough `Sysline` found then return [`FileOk`]
1246
    /// else return [`FileErrNoSyslinesFound`].
1247
    ///
1248
    /// [`Sysline`]: crate::data::sysline::Sysline
1249
    /// [`FileOk`]: self::FileProcessingResultBlockZero
1250
    /// [`FileErrNoSyslinesFound`]: self::FileProcessingResultBlockZero
1251
    pub(super) fn blockzero_analysis_syslines(&mut self) -> FileProcessingResultBlockZero {
66✔
1252
        defn!();
66✔
1253
        self.assert_stage(ProcessingStage::Stage1BlockzeroAnalysis);
66✔
1254

1255
        let blockp: BlockP = match self
66✔
1256
            .syslinereader
66✔
1257
            .linereader
66✔
1258
            .blockreader
66✔
1259
            .read_block(0)
66✔
1260
        {
1261
            ResultS3ReadBlock::Found(blockp_) => blockp_,
66✔
1262
            ResultS3ReadBlock::Done => {
×
1263
                defx!("return FileErrEmpty");
×
1264
                return FileProcessingResultBlockZero::FileErrEmpty;
×
1265
            }
1266
            ResultS3ReadBlock::Err(err) => {
×
1267
                self.set_error(&err);
×
1268
                defx!("return FileErrIo({:?})", err);
×
1269
                return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1270
            }
1271
        };
1272
        let blocksz0: BlockSz = (*blockp).len() as BlockSz;
66✔
1273
        let mut fo: FileOffset = 0;
66✔
1274
        // how many syslines have been found?
1275
        let mut found: Count = 0;
66✔
1276
        // must find at least this many syslines in block zero to be FileOk
1277
        let found_min: Count = *BLOCKZERO_ANALYSIS_SYSLINE_COUNT_MIN_MAP
66✔
1278
            .get(&blocksz0)
66✔
1279
            .unwrap();
66✔
1280
        defo!("block zero blocksz {} found_min {:?}", blocksz0, found_min);
66✔
1281

1282
        // find `at_max` Syslines within block zero
1283
        while found < found_min
97✔
1284
            && self.syslinereader.block_offset_at_file_offset(fo) == 0
66✔
1285
        {
1286
            fo = match self
66✔
1287
                .syslinereader
66✔
1288
                .find_sysline_in_block(fo)
66✔
1289
            {
1290
                (ResultS3SyslineFind::Found((fo_next, _slinep)), _) => {
31✔
1291
                    found += 1;
31✔
1292
                    defo!("Found; found {} syslines, fo_next {}", found, fo_next);
31✔
1293

1294
                    fo_next
31✔
1295
                }
1296
                (ResultS3SyslineFind::Done, partial_found) => {
35✔
1297
                    defo!("Done; found {} syslines, partial_found {}", found, partial_found);
35✔
1298
                    if partial_found {
35✔
1299
                        found += 1;
8✔
1300
                    }
27✔
1301
                    break;
35✔
1302
                }
1303
                (ResultS3SyslineFind::Err(err), _) => {
×
1304
                    self.set_error(&err);
×
1305
                    defx!("return FileErrIo({:?})", err);
×
1306
                    return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1307
                }
1308
            };
1309
        }
1310

1311
        if found == 0 {
66✔
1312
            defx!("found {} syslines, require {} syslines, return FileErrNoSyslinesFound", found, found_min);
27✔
1313
            return FileProcessingResultBlockZero::FileErrNoSyslinesFound;
27✔
1314
        }
39✔
1315

1316
        let patt_count_a = self.syslinereader.dt_patterns_counts_in_use();
39✔
1317
        defo!("dt_patterns_counts_in_use {}", patt_count_a);
39✔
1318

1319
        if !self.syslinereader.dt_patterns_analysis() {
39✔
1320
            de_err!("dt_patterns_analysis() failed which is unexpected; return FileErrNoSyslinesFound");
×
1321
            return FileProcessingResultBlockZero::FileErrNoSyslinesFound;
×
1322
        }
39✔
1323

1324
        let _patt_count_b = self.syslinereader.dt_patterns_counts_in_use();
39✔
1325
        debug_assert_eq!(
39✔
1326
            _patt_count_b,
1327
            SyslogProcessor::DT_PATTERN_MAX,
1328
            "expected patterns to be reduced to {}, found {:?}",
×
1329
            SyslogProcessor::DT_PATTERN_MAX,
1330
            _patt_count_b,
1331
        );
1332

1333
        // if more than one `DateTimeParseInstr` was used then the syslines
1334
        // must be reparsed using the one chosen `DateTimeParseInstr`
1335
        if patt_count_a > 1 {
39✔
1336
            defo!("must reprocess all syslines using limited patterns (used {} DateTimeParseInstr; must only use {})!", patt_count_a, 1);
8✔
1337

1338
            self.syslinereader.clear_syslines();
8✔
1339
            // find `at_max` Syslines within block zero
1340
            found = 0;
8✔
1341
            fo = 0;
8✔
1342
            while found < found_min
11✔
1343
                && self.syslinereader.block_offset_at_file_offset(fo) == 0
8✔
1344
            {
1345
                fo = match self
8✔
1346
                    .syslinereader
8✔
1347
                    .find_sysline_in_block(fo)
8✔
1348
                {
1349
                    (ResultS3SyslineFind::Found((fo_next, _slinep)), _) => {
3✔
1350
                        found += 1;
3✔
1351
                        defo!("Found; found {} syslines, fo_next {}", found, fo_next);
3✔
1352

1353
                        fo_next
3✔
1354
                    }
1355
                    (ResultS3SyslineFind::Done, partial_found) => {
5✔
1356
                        defo!("Done; found {} syslines, partial_found {}", found, partial_found);
5✔
1357
                        if partial_found {
5✔
1358
                            found += 1;
5✔
1359
                        }
5✔
1360
                        break;
5✔
1361
                    }
1362
                    (ResultS3SyslineFind::Err(err), _) => {
×
1363
                        self.set_error(&err);
×
1364
                        defx!("return FileErrIo({:?})", err);
×
1365
                        return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1366
                    }
1367
                };
1368
            }
1369
            defo!("done reprocessing.");
8✔
1370
        } else {
1371
            defo!("no reprocess needed ({} DateTimeParseInstr)!", patt_count_a);
31✔
1372
        }
1373

1374
        let fpr: FileProcessingResultBlockZero = match found >= found_min {
39✔
1375
            true => FileProcessingResultBlockZero::FileOk,
39✔
1376
            false => FileProcessingResultBlockZero::FileErrNoSyslinesFound,
×
1377
        };
1378

1379
        // sanity check that only one `DateTimeParseInstr` is in use
1380
        if cfg!(debug_assertions) && self.syslinereader.dt_patterns_counts_in_use() != 1 {
39✔
1381
            de_wrn!(
×
1382
                "dt_patterns_counts_in_use() = {}, expected 1; for {:?}",
×
1383
                self.syslinereader.dt_patterns_counts_in_use(), self.path()
×
1384
            );
×
1385
        }
39✔
1386

1387
        if self.syslinereader.is_streamed_file()
39✔
1388
            && !self.syslinereader.dt_pattern_has_year()
×
1389
        {
1390
            self.syslinereader.linereader.blockreader.disable_drop_data();
×
1391
            debug_assert!(!self.is_drop_data(), "is_drop_data() should be false");
×
1392
        }
39✔
1393

1394
        defx!("found {} syslines, require {} syslines, return {:?}", found, found_min, fpr);
39✔
1395

1396
        fpr
39✔
1397
    }
66✔
1398

1399
    /// Call `self.blockzero_analysis_lines`.
1400
    /// If that passes then call `self.blockzero_analysis_syslines`.
1401
    pub(super) fn blockzero_analysis(&mut self) -> FileProcessingResultBlockZero {
77✔
1402
        defn!();
77✔
1403
        assert!(!self.blockzero_analysis_done, "blockzero_analysis_lines should only be completed once.");
77✔
1404
        self.blockzero_analysis_done = true;
77✔
1405
        self.assert_stage(ProcessingStage::Stage1BlockzeroAnalysis);
77✔
1406

1407
        if self.syslinereader.filesz() == 0 {
77✔
1408
            defx!("return FileErrEmpty");
×
1409
            return FileProcessingResultBlockZero::FileErrEmpty;
×
1410
        }
77✔
1411

1412
        let result: FileProcessingResultBlockZero = self.blockzero_analysis_bytes();
77✔
1413
        if !result.is_ok() {
77✔
1414
            defx!("syslinereader.blockzero_analysis_bytes() was !is_ok(), return {:?}", result);
11✔
1415
            return result;
11✔
1416
        };
66✔
1417

1418
        let result: FileProcessingResultBlockZero = self.blockzero_analysis_lines();
66✔
1419
        if !result.is_ok() {
66✔
1420
            defx!("syslinereader.blockzero_analysis() was !is_ok(), return {:?}", result);
×
1421
            return result;
×
1422
        };
66✔
1423

1424
        let result: FileProcessingResultBlockZero = self.blockzero_analysis_syslines();
66✔
1425
        defx!("return {:?}", result);
66✔
1426

1427
        result
66✔
1428
    }
77✔
1429

1430
    #[cfg(test)]
1431
    pub(crate) fn dropped_blocks(&self) -> SetDroppedBlocks {
3✔
1432
        self.syslinereader
3✔
1433
            .linereader
3✔
1434
            .blockreader
3✔
1435
            .dropped_blocks
3✔
1436
            .clone()
3✔
1437
    }
3✔
1438

1439
    #[cfg(test)]
1440
    pub(crate) fn dropped_lines(&self) -> SetDroppedLines {
3✔
1441
        self.syslinereader
3✔
1442
            .linereader
3✔
1443
            .dropped_lines
3✔
1444
            .clone()
3✔
1445
    }
3✔
1446

1447
    #[cfg(test)]
1448
    pub(crate) fn dropped_syslines(&self) -> SetDroppedSyslines {
3✔
1449
        self.syslinereader
3✔
1450
            .dropped_syslines
3✔
1451
            .clone()
3✔
1452
    }
3✔
1453

1454
    pub fn summary(&self) -> SummarySyslogProcessor {
7✔
1455
        let syslogprocessor_missing_year = self.missing_year;
7✔
1456

1457
        SummarySyslogProcessor {
7✔
1458
            syslogprocessor_missing_year,
7✔
1459
        }
7✔
1460
    }
7✔
1461

1462
    /// Return an up-to-date [`Summary`] instance for this `SyslogProcessor`.
1463
    ///
1464
    /// Probably not useful or interesting before
1465
    /// `ProcessingStage::Stage4Summary`.
1466
    ///
1467
    /// [`Summary`]: crate::readers::summary::Summary
1468
    pub fn summary_complete(&self) -> Summary {
4✔
1469
        let path = self.path().clone();
4✔
1470
        let path_ntf = None;
4✔
1471
        let filetype = self.filetype();
4✔
1472
        let logmessagetype = filetype.to_logmessagetype();
4✔
1473
        let summaryblockreader = self.syslinereader.linereader.blockreader.summary();
4✔
1474
        let summarylinereader = self.syslinereader.linereader.summary();
4✔
1475
        let summarysyslinereader = self.syslinereader.summary();
4✔
1476
        let summarysyslogprocessor = self.summary();
4✔
1477
        let error: Option<String> = self.error.clone();
4✔
1478

1479
        Summary::new(
4✔
1480
            path,
4✔
1481
            path_ntf,
4✔
1482
            filetype,
4✔
1483
            logmessagetype,
4✔
1484
            Some(summaryblockreader),
4✔
1485
            Some(summarylinereader),
4✔
1486
            Some(summarysyslinereader),
4✔
1487
            Some(summarysyslogprocessor),
4✔
1488
            None,
4✔
1489
            None,
4✔
1490
            None,
4✔
1491
            error,
4✔
1492
        )
1493
    }
4✔
1494
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc