• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jtmoon79 / super-speedy-syslog-searcher / 16737850320

05 Aug 2025 01:02AM UTC coverage: 58.48% (+0.1%) from 58.369%
16737850320

push

github

jtmoon79
(TOOLS) fix build of mimalloc jemalloc

12068 of 20636 relevant lines covered (58.48%)

22089.74 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.47
/src/readers/syslogprocessor.rs
1
// src/readers/syslogprocessor.rs
2
// …
3

4
//! Implements a [`SyslogProcessor`], the driver of the processing stages for
5
//! a "syslog" file using a [`SyslineReader`].
6
//!
7
//! A "syslog" file in this context means any text-based file with logged
8
//! messages with a datetime stamp.
9
//! The file may use a formally defined log message format (e.g. RFC 5424)
10
//! or an ad-hoc log message format (most log files).<br/>
11
//! The two common assumptions are that:
12
//! 1. each log message has a datetime stamp on the first line
13
//! 2. log messages are in chronological order
14
//!
15
//! Sibling of [`FixedStructReader`]. But far more complicated due to the
16
//! ad-hoc nature of log files.
17
//! 
18
//! This is an _s4lib_ structure used by the binary program _s4_.
19
//!
20
//! [`FixedStructReader`]: crate::readers::fixedstructreader::FixedStructReader
21
//! [`SyslineReader`]: crate::readers::syslinereader::SyslineReader
22
//! [`SyslogProcessor`]: SyslogProcessor
23

24
#![allow(non_snake_case)]
25

26
use crate::common::{
27
    Count,
28
    FPath,
29
    FileOffset,
30
    FileProcessingResult,
31
    FileSz,
32
    FileType,
33
    SYSLOG_SZ_MAX,
34
};
35
use crate::data::datetime::{
36
    dt_after_or_before,
37
    systemtime_to_datetime,
38
    DateTimeL,
39
    DateTimeLOpt,
40
    Duration,
41
    FixedOffset,
42
    Result_Filter_DateTime1,
43
    SystemTime,
44
    Year,
45
};
46
use crate::data::sysline::SyslineP;
47
use crate::{e_err, de_err, de_wrn};
48
use crate::readers::blockreader::{
49
    BlockIndex,
50
    BlockOffset,
51
    BlockP,
52
    BlockSz,
53
    ResultS3ReadBlock,
54
};
55
#[cfg(test)]
56
use crate::readers::blockreader::SetDroppedBlocks;
57
#[cfg(test)]
58
use crate::readers::linereader::SetDroppedLines;
59
#[cfg(test)]
60
use crate::readers::syslinereader::SetDroppedSyslines;
61
#[doc(hidden)]
62
pub use crate::readers::linereader::ResultS3LineFind;
63
#[doc(hidden)]
64
pub use crate::readers::syslinereader::{
65
    DateTimePatternCounts,
66
    ResultS3SyslineFind,
67
    SummarySyslineReader,
68
    SyslineReader,
69
};
70
use crate::readers::summary::Summary;
71

72
use std::fmt;
73
use std::fmt::Debug;
74
use std::io::{Error, ErrorKind, Result};
75

76
use ::chrono::Datelike;
77
use ::lazy_static::lazy_static;
78
use ::rangemap::RangeMap;
79
use ::si_trace_print::{def1n, def1x, def1ñ, defn, defo, defx, defñ};
80

81

82
// ---------------
83
// SyslogProcessor
84

85
/// `SYSLOG_SZ_MAX` as a `BlockSz`.
86
pub(crate) const SYSLOG_SZ_MAX_BSZ: BlockSz = SYSLOG_SZ_MAX as BlockSz;
87

88
/// Typed [`FileProcessingResult`] for "block zero analysis".
89
///
90
/// [`FileProcessingResult`]: crate::common::FileProcessingResult
91
pub type FileProcessingResultBlockZero = FileProcessingResult<std::io::Error>;
92

93
/// Enum for the [`SyslogProcessor`] processing stages. Each file processed
94
/// advances through these stages. Sometimes stages may be skipped.
95
///
96
/// [`SyslogProcessor`]: self::SyslogProcessor
97
#[derive(Debug, Eq, Ord, PartialEq, PartialOrd)]
98
pub enum ProcessingStage {
99
    /// Does the file exist and is it a parseable type?
100
    Stage0ValidFileCheck,
101
    /// Check file can be parsed by trying to parse it. Determine the
102
    /// datetime patterns of any found [`Sysline`s].<br/>
103
    /// If no `Sysline`s are found then advance to `Stage4Summary`.
104
    ///
105
    /// [`Sysline`s]: crate::data::sysline::Sysline
106
    Stage1BlockzeroAnalysis,
107
    /// Find the first [`Sysline`] in the syslog file.<br/>
108
    /// If passed CLI option `--after` then find the first `Sysline` with
109
    /// datetime at or after the user-passed [`DateTimeL`].
110
    ///
111
    /// [`Sysline`]: crate::data::sysline::Sysline
112
    /// [`DateTimeL`]: crate::data::datetime::DateTimeL
113
    Stage2FindDt,
114
    /// Advanced through the syslog file to the end.<br/>
115
    /// If passed CLI option `--before` then process up to 
116
    /// the last [`Sysline`] with datetime at or before the user-passed
117
    /// [`DateTimeL`]. Otherwise, process all remaining Syslines.
118
    ///
119
    /// While advancing, try to [`drop`] previously processed data `Block`s,
120
    /// `Line`s, and `Sysline`s to lessen memory allocated.
121
    /// a.k.a. "_streaming stage_".
122
    ///
123
    /// Also see function [`find_sysline`].
124
    ///
125
    /// [`Sysline`]: crate::data::sysline::Sysline
126
    /// [`DateTimeL`]: crate::data::datetime::DateTimeL
127
    /// [`find_sysline`]: self::SyslogProcessor#method.find_sysline
128
    /// [`drop`]: self::SyslogProcessor#method.drop_data_try
129
    Stage3StreamSyslines,
130
    /// If passed CLI option `--summary` then print a summary of
131
    /// various information about the processed file.
132
    Stage4Summary,
133
}
134

135
/// [`BlockSz`] in a [`Range`].
136
///
137
/// [`Range`]: std::ops::Range
138
/// [`BlockSz`]: crate::readers::blockreader::BlockSz
139
type BszRange = std::ops::Range<BlockSz>;
140

141
/// Map [`BlockSz`] to a [`Count`].
142
///
143
/// [`BlockSz`]: crate::readers::blockreader::BlockSz
144
/// [`Count`]: crate::common::Count
145
type MapBszRangeToCount = RangeMap<u64, Count>;
146

147
lazy_static! {
148
    /// For files in `blockzero_analyis`, the number of [`Line`]s needed to
149
    /// be found within block zero.
150
    ///
151
    /// [`Line`]: crate::data::line::Line
152
    pub static ref BLOCKZERO_ANALYSIS_LINE_COUNT_MIN_MAP: MapBszRangeToCount = {
153
        defñ!("lazy_static! BLOCKZERO_ANALYSIS_LINE_COUNT_MIN_MAP::new()");
154

155
        let mut m = MapBszRangeToCount::new();
156
        m.insert(BszRange{start: 0, end: SYSLOG_SZ_MAX_BSZ}, 1);
157
        m.insert(BszRange{start: SYSLOG_SZ_MAX_BSZ, end: SYSLOG_SZ_MAX_BSZ * 3}, 3);
158
        m.insert(BszRange{start: SYSLOG_SZ_MAX_BSZ * 3, end: BlockSz::MAX}, 3);
159

160
        m
161
    };
162

163
    /// For files in `blockzero_analyis`, the number of [`Sysline`]s needed to
164
    /// be found within block zero.
165
    ///
166
    /// [`Sysline`]: crate::data::sysline::Sysline
167
    pub static ref BLOCKZERO_ANALYSIS_SYSLINE_COUNT_MIN_MAP: MapBszRangeToCount = {
168
        defñ!("lazy_static! BLOCKZERO_ANALYSIS_SYSLINE_COUNT_MIN_MAP::new()");
169

170
        let mut m = MapBszRangeToCount::new();
171
        m.insert(BszRange{start: 0, end: SYSLOG_SZ_MAX_BSZ}, 1);
172
        m.insert(BszRange{start: SYSLOG_SZ_MAX_BSZ, end: BlockSz::MAX}, 2);
173

174
        m
175
    };
176

177
    /// 25 hours.
178
    /// For processing syslog files without a year.
179
    /// If there is a datetime jump backwards more than this value then
180
    /// a year rollover happened.
181
    ///
182
    /// e.g. given log messages
183
    ///     Dec 31 23:59:59 [INFO] One!
184
    ///     Jan 1 00:00:00 [INFO] Happy New Year!!!
185
    /// These messages interpreted as the same year would be a jump backwards
186
    /// in time.
187
    /// Of course, this apparent "jump backwards" means the year changed.
188
    // XXX: cannot make `const` because `secs` is a private field
189
    static ref BACKWARDS_TIME_JUMP_MEANS_NEW_YEAR: Duration = Duration::try_seconds(60 * 60 * 25).unwrap();
190
}
191

192
/// The `SyslogProcessor` uses [`SyslineReader`] to find [`Sysline`s] in a file.
193
///
194
/// A `SyslogProcessor` has knowledge of:
195
/// - the different stages of processing a syslog file
196
/// - stores optional datetime filters and searches with them
197
/// - handles special cases of a syslog file with a datetime format without a
198
///   year
199
///
200
/// A `SyslogProcessor` is driven by a thread to fully process one syslog file.
201
///
202
/// During "[streaming stage]", the `SyslogProcessor` will proactively `drop`
203
/// data that has been processed and printed. It does so by calling
204
/// private function [`drop_data_try`] during function [`find_sysline`].
205
///
206
/// A `SyslogProcessor` presumes syslog messages are in chronological order.
207
///
208
/// [`Sysline`s]: crate::data::sysline::Sysline
209
/// [`SyslineReader`]: crate::readers::syslinereader::SyslineReader
210
/// [`LineReader`]: crate::readers::linereader::LineReader
211
/// [`BlockReader`]: crate::readers::blockreader::BlockReader
212
/// [`drop_data_try`]: self::SyslogProcessor#method.drop_data_try
213
/// [`find_sysline`]: self::SyslogProcessor#method.find_sysline
214
/// [streaming stage]: self::ProcessingStage#variant.Stage3StreamSyslines
215
pub struct SyslogProcessor {
216
    syslinereader: SyslineReader,
217
    /// Current `ProcessingStage`.
218
    processingstage: ProcessingStage,
219
    /// `FPath`.
220
    // TODO: remove this, use the `BlockReader` path, (DRY)
221
    path: FPath,
222
    // TODO: remove this, use the `BlockReader` blocksz, (DRY)
223
    blocksz: BlockSz,
224
    /// `FixedOffset` timezone for datetime formats without a timezone.
225
    tz_offset: FixedOffset,
226
    /// Optional filter, syslines _after_ this `DateTimeL`.
227
    filter_dt_after_opt: DateTimeLOpt,
228
    /// Optional filter, syslines _before_ this `DateTimeL`.
229
    filter_dt_before_opt: DateTimeLOpt,
230
    /// Internal sanity check, has `self.blockzero_analysis()` completed?
231
    blockzero_analysis_done: bool,
232
    /// Internal tracking of last `blockoffset` passed to `drop_block`.
233
    drop_block_last: BlockOffset,
234
    /// Optional `Year` value used to start `process_missing_year()`.
235
    /// Only needed for syslog files with datetime format without a year.
236
    missing_year: Option<Year>,
237
    /// The last [`Error`], if any, as a `String`. Set by [`set_error`].
238
    ///
239
    /// Annoyingly, cannot [Clone or Copy `Error`].
240
    ///
241
    /// [`Error`]: std::io::Error
242
    /// [Clone or Copy `Error`]: https://github.com/rust-lang/rust/issues/24135
243
    /// [`set_error`]: self::SyslogProcessor#method.set_error
244
    // TRACKING: https://github.com/rust-lang/rust/issues/24135
245
    error: Option<String>,
246
}
247

248
impl Debug for SyslogProcessor {
249
    fn fmt(
×
250
        &self,
×
251
        f: &mut fmt::Formatter,
×
252
    ) -> fmt::Result {
×
253
        f.debug_struct("SyslogProcessor")
×
254
            .field("Path", &self.path)
×
255
            .field("Processing Stage", &self.processingstage)
×
256
            .field("BlockSz", &self.blocksz)
×
257
            .field("TimeOffset", &self.tz_offset)
×
258
            .field("filter_dt_after_opt", &self.filter_dt_after_opt)
×
259
            .field("filter_dt_before_opt", &self.filter_dt_before_opt)
×
260
            .field("BO Analysis done?", &self.blockzero_analysis_done)
×
261
            .field("filetype", &self.filetype())
×
262
            .field("Reprocessed missing year?", &self.did_process_missing_year())
×
263
            .field("Missing Year", &self.missing_year)
×
264
            .field("Error?", &self.error)
×
265
            .finish()
×
266
    }
×
267
}
268

269
// TODO: [2023/04] remove redundant variable prefix name `syslogprocessor_`
270
#[derive(Clone, Debug, Default, Eq, PartialEq)]
271
pub struct SummarySyslogProcessor {
272
    /// `SyslogProcessor::missing_year`
273
    pub syslogprocessor_missing_year: Option<Year>,
274
}
275

276
impl SyslogProcessor {
277
    /// `SyslogProcessor` has it's own miminum requirements for `BlockSz`.
278
    ///
279
    /// Necessary for `blockzero_analysis` functions to have chance at success.
280
    #[doc(hidden)]
281
    #[cfg(any(debug_assertions, test))]
282
    pub const BLOCKSZ_MIN: BlockSz = 0x2;
283

284
    /// Maximum number of datetime patterns for matching the remainder of a syslog file.
285
    const DT_PATTERN_MAX: usize = SyslineReader::DT_PATTERN_MAX;
286

287
    /// `SyslogProcessor` has it's own miminum requirements for `BlockSz`.
288
    ///
289
    /// Necessary for `blockzero_analysis` functions to have chance at success.
290
    #[cfg(not(any(debug_assertions, test)))]
291
    pub const BLOCKSZ_MIN: BlockSz = 0x40;
292

293
    /// Minimum number of bytes needed to perform `blockzero_analysis_bytes`.
294
    ///
295
    /// Pretty sure this is smaller than the smallest possible timestamp that
296
    /// can be processed by the `DTPD!` in `DATETIME_PARSE_DATAS`.
297
    /// In other words, a file that only has a datetimestamp followed by an
298
    /// empty log message.
299
    ///
300
    /// It's okay if this is too small as the later processing stages will
301
    /// be certain of any possible datetime patterns.
302
    pub const BLOCKZERO_ANALYSIS_BYTES_MIN: BlockSz = 6;
303

304
    /// If the first number of bytes are zero bytes (NULL bytes) then
305
    /// stop processing the file. It's extremely unlikely this is a syslog
306
    /// file and more likely it's some sort of binary data file.
307
    pub const BLOCKZERO_ANALYSIS_BYTES_NULL_MAX: usize = 128;
308

309
    /// Allow "streaming stage" to drop data?
310
    /// Compile-time "option" to aid manual debugging.
311
    #[doc(hidden)]
312
    const STREAM_STAGE_DROP: bool = true;
313

314
    /// Use LRU caches in underlying components?
315
    ///
316
    /// XXX: For development and testing experiments!
317
    #[doc(hidden)]
318
    const LRU_CACHE_ENABLE: bool = true;
319

320
    /// Create a new `SyslogProcessor`.
321
    ///
322
    /// **NOTE:** should not attempt any block reads here,
323
    /// similar to other `*Readers::new()`
324
    pub fn new(
98✔
325
        path: FPath,
98✔
326
        filetype: FileType,
98✔
327
        blocksz: BlockSz,
98✔
328
        tz_offset: FixedOffset,
98✔
329
        filter_dt_after_opt: DateTimeLOpt,
98✔
330
        filter_dt_before_opt: DateTimeLOpt,
98✔
331
    ) -> Result<SyslogProcessor> {
98✔
332
        def1n!("({:?}, {:?}, {:?}, {:?})", path, filetype, blocksz, tz_offset);
98✔
333
        if blocksz < SyslogProcessor::BLOCKSZ_MIN {
98✔
334
            return Result::Err(
×
335
                Error::new(
×
336
                    ErrorKind::InvalidInput,
×
337
                    format!(
×
338
                        "BlockSz {0} (0x{0:08X}) is too small, SyslogProcessor has BlockSz minimum {1} (0x{1:08X}) file {2:?}",
×
339
                        blocksz, SyslogProcessor::BLOCKSZ_MIN, &path,
×
340
                    )
×
341
                )
×
342
            );
×
343
        }
98✔
344
        let path_ = path.clone();
98✔
345
        let mut slr = match SyslineReader::new(path, filetype, blocksz, tz_offset) {
98✔
346
            Ok(val) => val,
96✔
347
            Err(err) => {
2✔
348
                def1x!();
2✔
349
                return Result::Err(err);
2✔
350
            }
351
        };
352

353
        if !SyslogProcessor::LRU_CACHE_ENABLE {
96✔
354
            slr.LRU_cache_disable();
×
355
            slr.linereader
×
356
                .LRU_cache_disable();
×
357
            slr.linereader
×
358
                .blockreader
×
359
                .LRU_cache_disable();
×
360
        }
96✔
361

362
        def1x!("return Ok(SyslogProcessor)");
96✔
363

364
        Result::Ok(
96✔
365
            SyslogProcessor {
96✔
366
                syslinereader: slr,
96✔
367
                processingstage: ProcessingStage::Stage0ValidFileCheck,
96✔
368
                path: path_,
96✔
369
                blocksz,
96✔
370
                tz_offset,
96✔
371
                filter_dt_after_opt,
96✔
372
                filter_dt_before_opt,
96✔
373
                blockzero_analysis_done: false,
96✔
374
                drop_block_last: 0,
96✔
375
                missing_year: None,
96✔
376
                error: None,
96✔
377
            }
96✔
378
        )
96✔
379
    }
98✔
380

381
    /// `Count` of [`Line`s] processed.
382
    ///
383
    /// [`Line`s]: crate::data::line::Line
384
    #[inline(always)]
385
    #[allow(dead_code)]
386
    pub fn count_lines(&self) -> Count {
×
387
        self.syslinereader
×
388
            .linereader
×
389
            .count_lines_processed()
×
390
    }
×
391

392
    /// See [`Sysline::count_syslines_stored`].
393
    ///
394
    /// [`Sysline::count_syslines_stored`]: crate::data::sysline::Sysline::count_syslines_stored
395
    #[cfg(test)]
396
    pub fn count_syslines_stored(&self) -> Count {
4✔
397
        self.syslinereader.count_syslines_stored()
4✔
398
    }
4✔
399

400
    /// See [`BlockReader::blocksz`].
401
    ///
402
    /// [`BlockReader::blocksz`]: crate::readers::blockreader::BlockReader#method.blocksz
403
    #[inline(always)]
404
    pub const fn blocksz(&self) -> BlockSz {
77✔
405
        self.syslinereader.blocksz()
77✔
406
    }
77✔
407

408
    /// See [`BlockReader::filesz`].
409
    ///
410
    /// [`BlockReader::filesz`]: crate::readers::blockreader::BlockReader#method.filesz
411
    #[inline(always)]
412
    pub const fn filesz(&self) -> FileSz {
80✔
413
        self.syslinereader.filesz()
80✔
414
    }
80✔
415

416
    /// See [`BlockReader::filetype`].
417
    ///
418
    /// [`BlockReader::filetype`]: crate::readers::blockreader::BlockReader#method.filetype
419
    #[inline(always)]
420
    pub const fn filetype(&self) -> FileType {
4✔
421
        self.syslinereader.filetype()
4✔
422
    }
4✔
423

424
    /// See [`BlockReader::path`].
425
    ///
426
    /// [`BlockReader::path`]: crate::readers::blockreader::BlockReader#method.path
427
    #[inline(always)]
428
    #[allow(dead_code)]
429
    pub const fn path(&self) -> &FPath {
4✔
430
        self.syslinereader.path()
4✔
431
    }
4✔
432

433
    /// See [`BlockReader::block_offset_at_file_offset`].
434
    ///
435
    /// [`BlockReader::block_offset_at_file_offset`]: crate::readers::blockreader::BlockReader#method.block_offset_at_file_offset
436
    #[allow(dead_code)]
437
    pub const fn block_offset_at_file_offset(
×
438
        &self,
×
439
        fileoffset: FileOffset,
×
440
    ) -> BlockOffset {
×
441
        self.syslinereader
×
442
            .block_offset_at_file_offset(fileoffset)
×
443
    }
×
444

445
    /// See [`BlockReader::file_offset_at_block_offset`].
446
    ///
447
    /// [`BlockReader::file_offset_at_block_offset`]: crate::readers::blockreader::BlockReader#method.file_offset_at_block_offset
448
    #[allow(dead_code)]
449
    pub const fn file_offset_at_block_offset(
×
450
        &self,
×
451
        blockoffset: BlockOffset,
×
452
    ) -> FileOffset {
×
453
        self.syslinereader
×
454
            .file_offset_at_block_offset(blockoffset)
×
455
    }
×
456

457
    /// See [`BlockReader::file_offset_at_block_offset_index`].
458
    ///
459
    /// [`BlockReader::file_offset_at_block_offset_index`]: crate::readers::blockreader::BlockReader#method.file_offset_at_block_offset_index
460
    #[allow(dead_code)]
461
    pub const fn file_offset_at_block_offset_index(
×
462
        &self,
×
463
        blockoffset: BlockOffset,
×
464
        blockindex: BlockIndex,
×
465
    ) -> FileOffset {
×
466
        self.syslinereader
×
467
            .file_offset_at_block_offset_index(blockoffset, blockindex)
×
468
    }
×
469

470
    /// See [`BlockReader::block_index_at_file_offset`].
471
    ///
472
    /// [`BlockReader::block_index_at_file_offset`]: crate::readers::blockreader::BlockReader#method.block_index_at_file_offset
473
    #[allow(dead_code)]
474
    pub const fn block_index_at_file_offset(
×
475
        &self,
×
476
        fileoffset: FileOffset,
×
477
    ) -> BlockIndex {
×
478
        self.syslinereader
×
479
            .block_index_at_file_offset(fileoffset)
×
480
    }
×
481

482
    /// See [`BlockReader::count_blocks`].
483
    ///
484
    /// [`BlockReader::count_blocks`]: crate::readers::blockreader::BlockReader#method.count_blocks
485
    #[allow(dead_code)]
486
    pub const fn count_blocks(&self) -> Count {
×
487
        self.syslinereader
×
488
            .count_blocks()
×
489
    }
×
490

491
    /// See [`BlockReader::blockoffset_last`].
492
    ///
493
    /// [`BlockReader::blockoffset_last`]: crate::readers::blockreader::BlockReader#method.blockoffset_last
494
    #[allow(dead_code)]
495
    pub const fn blockoffset_last(&self) -> BlockOffset {
×
496
        self.syslinereader
×
497
            .blockoffset_last()
×
498
    }
×
499

500
    /// See [`BlockReader::fileoffset_last`].
501
    ///
502
    /// [`BlockReader::fileoffset_last`]: crate::readers::blockreader::BlockReader#method.fileoffset_last
503
    pub const fn fileoffset_last(&self) -> FileOffset {
5✔
504
        self.syslinereader
5✔
505
            .fileoffset_last()
5✔
506
    }
5✔
507

508
    /// See [`LineReader::charsz`].
509
    ///
510
    /// [`LineReader::charsz`]: crate::readers::linereader::LineReader#method.charsz
511
    #[allow(dead_code)]
512
    pub const fn charsz(&self) -> usize {
5✔
513
        self.syslinereader.charsz()
5✔
514
    }
5✔
515

516
    /// See [`BlockReader::mtime`].
517
    ///
518
    /// [`BlockReader::mtime`]: crate::readers::blockreader::BlockReader#method.mtime
519
    pub fn mtime(&self) -> SystemTime {
4✔
520
        self.syslinereader.mtime()
4✔
521
    }
4✔
522

523
    /// Did this `SyslogProcessor` run `process_missing_year()` ?
524
    fn did_process_missing_year(&self) -> bool {
5✔
525
        self.missing_year.is_some()
5✔
526
    }
5✔
527

528
    /// Return `drop_data` value.
529
    pub const fn is_drop_data(&self) -> bool {
38✔
530
        self.syslinereader.is_drop_data()
38✔
531
    }
38✔
532

533
    /// store an `Error` that occurred. For later printing during `--summary`.
534
    // XXX: duplicates `FixedStructReader.set_error`
535
    fn set_error(
×
536
        &mut self,
×
537
        error: &Error,
×
538
    ) {
×
539
        def1ñ!("{:?}", error);
×
540
        let mut error_string: String = error.kind().to_string();
×
541
        error_string.push_str(": ");
×
542
        error_string.push_str(error.kind().to_string().as_str());
×
543
        // print the error but avoid printing the same error more than once
544
        // XXX: This is somewhat a hack as it's possible the same error, with the
545
        //      the same error message, could occur more than once.
546
        //      Considered another way, this function `set_error` may get called
547
        //      too often. The responsibility for calling `set_error` is haphazard.
548
        match &self.error {
×
549
            Some(err_s) => {
×
550
                if err_s != &error_string {
×
551
                    e_err!("{}", error);
×
552
                }
×
553
            }
554
            None => {
×
555
                e_err!("{}", error);
×
556
            }
×
557
        }
558
        if let Some(ref _err) = self.error {
×
559
            de_wrn!("skip overwrite of previous Error {:?} with Error ({:?})", _err, error);
×
560
            return;
×
561
        }
×
562
        self.error = Some(error_string);
×
563
    }
×
564

565
    /// Syslog files wherein the datetime format that does not include a year
566
    /// must have special handling.
567
    ///
568
    /// The last [`Sysline`] in the file is presumed to share the same year as
569
    /// the `mtime` (stored by the underlying [`BlockReader`] instance).
570
    /// The entire file is read from end to beginning (in reverse) (unless
571
    /// a `filter_dt_after_opt` is passed that coincides with the found
572
    /// syslines). The year is tracked and updated for each sysline.
573
    /// If there is jump backwards in time, that is presumed to be a
574
    /// year changeover.
575
    ///
576
    /// For example, given syslog contents
577
    ///
578
    /// ```text
579
    /// Nov 1 12:00:00 hello
580
    /// Dec 1 12:00:00 good morning
581
    /// Jan 1 12:00:00 goodbye
582
    /// ```
583
    ///
584
    /// and file `mtime` that is datetime _January 1 12:00:00 2015_,
585
    /// then the last `Sysline` "Jan 1 12:00:00 goodbye" is presumed to be in
586
    /// year 2015.
587
    /// The preceding `Sysline` "Dec 1 12:00:00 goodbye" is then processed.
588
    /// An apparent backwards jump is seen _Jan 1_ to _Dec 1_.
589
    /// From this, it can be concluded the _Dec 1_ refers to a prior year, 2014.
590
    ///
591
    /// Typically, when a datetime filter is passed, a special binary search is
592
    /// done to find the desired syslog line, reducing resource usage. Whereas,
593
    /// files processed here must be read linearly and in their entirety
594
    /// Or, if `filter_dt_after_opt` is passed then the file is read to the
595
    /// first `sysline.dt()` (datetime) that is
596
    /// `Result_Filter_DateTime1::OccursBefore` the
597
    /// `filter_dt_after_opt`.
598
    ///
599
    /// [`Sysline`]: crate::data::sysline::Sysline
600
    /// [`BlockReader`]: crate::readers::blockreader::BlockReader
601
    /// [`DateTimeL`]: crate::data::datetime::DateTimeL
602
    // BUG: does not revise year guesstimation based on encountering leap date February 29
603
    //      See Issue #245
604
    pub fn process_missing_year(
5✔
605
        &mut self,
5✔
606
        mtime: SystemTime,
5✔
607
        filter_dt_after_opt: &DateTimeLOpt,
5✔
608
    ) -> FileProcessingResultBlockZero {
5✔
609
        defn!("({:?}, {:?})", mtime, filter_dt_after_opt);
5✔
610
        debug_assert!(!self.did_process_missing_year(), "process_missing_year() must only be called once");
5✔
611
        let dt_mtime: DateTimeL = systemtime_to_datetime(&self.tz_offset, &mtime);
5✔
612
        defo!("converted dt_mtime {:?}", dt_mtime);
5✔
613
        let year: Year = dt_mtime.date_naive().year() as Year;
5✔
614
        self.missing_year = Some(year);
5✔
615
        defo!("converted missing_year {:?}", self.missing_year);
5✔
616
        let mut year_opt: Option<Year> = Some(year);
5✔
617
        defo!("year_opt {:?}", year_opt);
5✔
618
        let charsz_fo: FileOffset = self.charsz() as FileOffset;
5✔
619

620
        // The previously stored `Sysline`s have a filler year that is most likely incorrect.
621
        // The underlying `Sysline` instance cannot be updated behind an `Arc`.
622
        // Those syslines must be dropped and the entire file processed again.
623
        // However, underlying `Line` and `Block` are still valid; do not reprocess those.
624
        self.syslinereader
5✔
625
            .clear_syslines();
5✔
626

627
        // read all syslines in reverse
628
        let mut fo_prev: FileOffset = self.fileoffset_last();
5✔
629
        let mut syslinep_prev_opt: Option<SyslineP> = None;
5✔
630
        loop {
631
            let syslinep: SyslineP = match self
16✔
632
                .syslinereader
16✔
633
                .find_sysline_year(fo_prev, &year_opt)
16✔
634
            {
635
                ResultS3SyslineFind::Found((_fo, syslinep)) => {
16✔
636
                    defo!(
16✔
637
                        "Found {} Sysline @[{}, {}] datetime: {:?})",
16✔
638
                        _fo,
639
                        (*syslinep).fileoffset_begin(),
16✔
640
                        (*syslinep).fileoffset_end(),
16✔
641
                        (*syslinep).dt()
16✔
642
                    );
643
                    syslinep
16✔
644
                }
645
                ResultS3SyslineFind::Done => {
×
646
                    defo!("Done, break;");
×
647
                    break;
×
648
                }
649
                ResultS3SyslineFind::Err(err) => {
×
650
                    self.set_error(&err);
×
651
                    defx!("return FileErrIo({:?})", err);
×
652
                    return FileProcessingResultBlockZero::FileErrIoPath(err);
×
653
                }
654
            };
655
            // TODO: [2022/07/27] add fn `syslinereader.find_sysline_year_rev` to hide these char offset
656
            //       details (put them into a struct that is meant to understand these details)
657
            let fo_prev_prev: FileOffset = fo_prev;
16✔
658
            fo_prev = (*syslinep).fileoffset_begin();
16✔
659
            // check if datetime has suddenly jumped backwards.
660
            // if date has jumped backwards, then remove sysline, update the year, and process the file
661
            // from that fileoffset again
662
            match syslinep_prev_opt {
16✔
663
                Some(syslinep_prev) => {
11✔
664
                    // normally `dt_cur` should have a datetime *before or equal* to `dt_prev`
665
                    // but if not, then there was probably a year rollover
666
                    if (*syslinep).dt() > (*syslinep_prev).dt() {
11✔
667
                        let diff: Duration = *(*syslinep).dt() - *(*syslinep_prev).dt();
×
668
                        if diff > *BACKWARDS_TIME_JUMP_MEANS_NEW_YEAR {
×
669
                            year_opt = Some(year_opt.unwrap() - 1);
×
670
                            defo!("year_opt updated {:?}", year_opt);
×
671
                            self.syslinereader
×
672
                                .remove_sysline(fo_prev);
×
673
                            fo_prev = fo_prev_prev;
×
674
                            syslinep_prev_opt = Some(syslinep_prev.clone());
×
675
                            continue;
×
676
                        }
×
677
                    }
11✔
678
                }
679
                None => {}
5✔
680
            }
681
            if fo_prev < charsz_fo {
16✔
682
                defo!("fo_prev {} break;", fo_prev);
3✔
683
                // fileoffset is at the beginning of the file (or, cannot be moved back any more)
684
                break;
3✔
685
            }
13✔
686
            // if user-passed `--dt-after` and the sysline is prior to that filter then
687
            // stop processing
688
            match dt_after_or_before(syslinep.dt(), filter_dt_after_opt) {
13✔
689
                Result_Filter_DateTime1::OccursBefore => {
690
                    defo!("dt_after_or_before({:?},  {:?}) returned OccursBefore; break", syslinep.dt(), filter_dt_after_opt);
2✔
691
                    break;
2✔
692
                }
693
                Result_Filter_DateTime1::OccursAtOrAfter | Result_Filter_DateTime1::Pass => {},
11✔
694
            }
695
            // search for preceding sysline
696
            fo_prev -= charsz_fo;
11✔
697
            if fo_prev >= fo_prev_prev {
11✔
698
                // This will happen in case where the very first line of the file
699
                // holds a sysline with datetime pattern without a year, and that
700
                // sysline datetime pattern is different than all
701
                // proceeding syslines that have a year. (and it should only happen then)
702
                // Elicited by example in Issue #74
703
                de_err!("fo_prev {} ≥ {} fo_prev_prev, expected <; something is wrong", fo_prev, fo_prev_prev);
×
704
                // must break otherwise end up in an infinite loop
705
                break;
×
706
            }
11✔
707
            syslinep_prev_opt = Some(syslinep.clone());
11✔
708
        } // end loop
709
        defx!("return FileOk");
5✔
710

711
        FileProcessingResultBlockZero::FileOk
5✔
712
    }
5✔
713

714
    /// See [`SyslineReader::is_sysline_last`].
715
    ///
716
    /// [`SyslineReader::is_sysline_last`]: crate::readers::syslinereader::SyslineReader#method.is_sysline_last
717
    pub fn is_sysline_last(
×
718
        &self,
×
719
        syslinep: &SyslineP,
×
720
    ) -> bool {
×
721
        self.syslinereader
×
722
            .is_sysline_last(syslinep)
×
723
    }
×
724

725
    /// Try to `drop` data associated with the [`Block`] at [`BlockOffset`].
726
    /// This includes dropping associated [`Sysline`]s and [`Line`]s.
727
    /// This calls [`SyslineReader::drop_data`].
728
    ///
729
    /// _The caller must know what they are doing!_
730
    ///
731
    /// [`BlockOffset`]: crate::readers::blockreader::BlockOffset
732
    /// [`Sysline`]: crate::data::sysline::Sysline
733
    /// [`Line`]: crate::data::line::Line
734
    /// [`Block`]: crate::readers::blockreader::Block
735
    pub fn drop_data(
14✔
736
        &mut self,
14✔
737
        blockoffset: BlockOffset,
14✔
738
    ) -> bool {
14✔
739
        def1n!("({})", blockoffset);
14✔
740
        self.assert_stage(ProcessingStage::Stage3StreamSyslines);
14✔
741

742
        if ! self.is_drop_data() {
14✔
743
            def1x!("return false; is_drop_data() is false");
×
744
            return false;
×
745
        }
14✔
746

747
        // `syslinereader.drop_data` is an expensive function, skip if possible.
748
        if blockoffset == self.drop_block_last {
14✔
749
            def1x!("({}) skip block, return true", blockoffset);
5✔
750
            return false;
5✔
751
        }
9✔
752

753
        if self
9✔
754
            .syslinereader
9✔
755
            .drop_data(blockoffset)
9✔
756
        {
757
            self.drop_block_last = blockoffset;
4✔
758
            def1x!("({}) return true", blockoffset);
4✔
759
            return true;
4✔
760
        }
5✔
761

762
        def1x!("({}) return false", blockoffset);
5✔
763
        false
5✔
764
    }
14✔
765

766
    /// Call [`drop_data`] for the data assocaited with the [`Block`]
767
    /// *preceding* the first block of the passed [`Sysline`].
768
    ///
769
    /// _The caller must know what they are doing!_
770
    ///
771
    /// [`drop_data`]: Self#method.drop_data
772
    /// [`Block`]: crate::readers::blockreader::Block
773
    /// [`Sysline`]: crate::data::sysline::Sysline
774
    pub fn drop_data_try(
24✔
775
        &mut self,
24✔
776
        syslinep: &SyslineP,
24✔
777
    ) -> bool {
24✔
778
        if !SyslogProcessor::STREAM_STAGE_DROP {
24✔
779
            de_wrn!("drop_data_try() called but SyslogProcessor::STREAM_STAGE_DROP is false");
×
780
            return false;
×
781
        }
24✔
782
        if !self.is_drop_data() {
24✔
783
            def1ñ!("is_drop_data() is false; return false");
×
784
            return false;
×
785
        }
24✔
786

787
        let bo_first: BlockOffset = (*syslinep).blockoffset_first();
24✔
788
        if bo_first > 1 {
24✔
789
            def1ñ!();
14✔
790
            return self.drop_data(bo_first - 2);
14✔
791
        }
10✔
792

793
        false
10✔
794
    }
24✔
795

796
    /// Calls [`self.syslinereader.find_sysline(fileoffset)`],
797
    /// and in some cases calls private function `drop_block` to drop
798
    /// previously processed [`Sysline`], [`Line`], and [`Block`s].
799
    ///
800
    /// This is what implements the "streaming" in "[streaming stage]".
801
    ///
802
    /// [`self.syslinereader.find_sysline(fileoffset)`]: crate::readers::syslinereader::SyslineReader#method.find_sysline
803
    /// [`Block`s]: crate::readers::blockreader::Block
804
    /// [`Line`]: crate::data::line::Line
805
    /// [`Sysline`]: crate::data::sysline::Sysline
806
    /// [streaming stage]: crate::readers::syslogprocessor::ProcessingStage#variant.Stage3StreamSyslines
807
    pub fn find_sysline(
88✔
808
        &mut self,
88✔
809
        fileoffset: FileOffset,
88✔
810
    ) -> ResultS3SyslineFind {
88✔
811
        defn!("({})", fileoffset);
88✔
812
        let result: ResultS3SyslineFind = self
88✔
813
            .syslinereader
88✔
814
            .find_sysline(fileoffset);
88✔
815
        match result {
88✔
816
            ResultS3SyslineFind::Found(_) => {}
74✔
817
            ResultS3SyslineFind::Done => {}
14✔
818
            ResultS3SyslineFind::Err(ref err) => {
×
819
                self.set_error(err);
×
820
            }
×
821
        }
822
        defx!();
88✔
823

824
        result
88✔
825
    }
88✔
826

827
    /// Wrapper function for [`SyslineReader::find_sysline_between_datetime_filters`].
828
    /// Keeps a custom copy of any returned `Error` at `self.error`.
829
    ///
830
    /// [`SyslineReader::find_sysline_between_datetime_filters`]: crate::readers::syslinereader::SyslineReader#method.find_sysline_between_datetime_filters
831
    //
832
    // TODO: [2022/06/20] the `find` functions need consistent naming,
833
    //       `find_next`, `find_between`, `find_…` . The current design has
834
    //       the public-facing `find_` functions falling back on potential file-wide binary-search
835
    //       The binary-search only needs to be done during the stage 2. During stage 3, a simpler
836
    //       linear sequential search is more suitable, and more intuitive.
837
    //       More refactoring is in order.
838
    //       Also, a linear search can better detect rollover (i.e. when sysline datetime is missing year).
839
    // TODO: [2023/03/06] add stats tracking in `find` functions for number of
840
    //       "jumps" or bounces or fileoffset changes to confirm big-O
841
    #[inline(always)]
842
    pub fn find_sysline_between_datetime_filters(
5✔
843
        &mut self,
5✔
844
        fileoffset: FileOffset,
5✔
845
    ) -> ResultS3SyslineFind {
5✔
846
        defn!("({})", fileoffset);
5✔
847

848
        let result = match self
5✔
849
            .syslinereader
5✔
850
            .find_sysline_between_datetime_filters(
5✔
851
                fileoffset,
5✔
852
                &self.filter_dt_after_opt,
5✔
853
                &self.filter_dt_before_opt,
5✔
854
            ) {
5✔
855
            ResultS3SyslineFind::Err(err) => {
×
856
                self.set_error(&err);
×
857

858
                ResultS3SyslineFind::Err(err)
×
859
            }
860
            val => val,
5✔
861
        };
862

863
        defx!("({})", fileoffset);
5✔
864

865
        result
5✔
866
    }
5✔
867

868
    /// Wrapper function for a recurring sanity check.
869
    ///
870
    /// Good for checking functions `process_stage…` are called in
871
    /// the correct order.
872
    // XXX: is there a rust-ic way to enforce stage procession behavior
873
    //      at compile-time? It's a fairly simple enumerated type. Could a
874
    //      `match` tree (or something like that) be used?
875
    //      run-time checks of rust enum values seems hacky.
876
    #[inline(always)]
877
    fn assert_stage(
469✔
878
        &self,
469✔
879
        stage_expact: ProcessingStage,
469✔
880
    ) {
469✔
881
        debug_assert_eq!(
469✔
882
            self.processingstage, stage_expact,
883
            "Unexpected Processing Stage {:?}, expected Processing Stage {:?}",
×
884
            self.processingstage, stage_expact,
885
        );
886
    }
469✔
887

888
    /// Stage 0 does some sanity checks on the file.
889
    // TODO: this is redundant and has already been performed by functions in
890
    //       `filepreprocessor` and `BlockReader::new`.
891
    pub fn process_stage0_valid_file_check(&mut self) -> FileProcessingResultBlockZero {
80✔
892
        defn!();
80✔
893
        // sanity check calls are in correct order
894
        self.assert_stage(ProcessingStage::Stage0ValidFileCheck);
80✔
895
        self.processingstage = ProcessingStage::Stage0ValidFileCheck;
80✔
896

897
        if self.filesz() == 0 {
80✔
898
            defx!("filesz 0; return {:?}", FileProcessingResultBlockZero::FileErrEmpty);
2✔
899
            return FileProcessingResultBlockZero::FileErrEmpty;
2✔
900
        }
78✔
901
        defx!("return {:?}", FileProcessingResultBlockZero::FileOk);
78✔
902

903
        FileProcessingResultBlockZero::FileOk
78✔
904
    }
80✔
905

906
    /// Stage 1: Can [`Line`s] and [`Sysline`s] be parsed from the first block
907
    /// (block zero)?
908
    ///
909
    /// [`Sysline`s]: crate::data::sysline::Sysline
910
    /// [`Line`s]: crate::data::line::Line
911
    pub fn process_stage1_blockzero_analysis(&mut self) -> FileProcessingResultBlockZero {
77✔
912
        defn!();
77✔
913
        self.assert_stage(ProcessingStage::Stage0ValidFileCheck);
77✔
914
        self.processingstage = ProcessingStage::Stage1BlockzeroAnalysis;
77✔
915

916
        let result: FileProcessingResultBlockZero = self.blockzero_analysis();
77✔
917
        // stored syslines may be zero if a "partial" `Line` was examined
918
        // e.g. an incomplete and temporary `Line` instance was examined.
919
        defo!(
77✔
920
            "blockzero_analysis() stored syslines {}",
77✔
921
            self.syslinereader
77✔
922
                .count_syslines_stored()
77✔
923
        );
924
        match result {
77✔
925
            FileProcessingResult::FileOk => {}
39✔
926
            // skip further processing if not `FileOk`
927
            _ => {
928
                defx!("return {:?}", result);
38✔
929
                return result;
38✔
930
            }
931
        }
932

933
        defx!("return {:?}", result);
39✔
934

935
        result
39✔
936
    }
77✔
937

938
    /// Stage 2: Given the an optional datetime filter (user-passed
939
    /// `--dt-after`), can a log message with a datetime after that filter be
940
    /// found?
941
    pub fn process_stage2_find_dt(
8✔
942
        &mut self,
8✔
943
        filter_dt_after_opt: &DateTimeLOpt,
8✔
944
    ) -> FileProcessingResultBlockZero {
8✔
945
        defn!();
8✔
946
        self.assert_stage(ProcessingStage::Stage1BlockzeroAnalysis);
8✔
947
        self.processingstage = ProcessingStage::Stage2FindDt;
8✔
948

949
        // datetime formats without a year requires special handling
950
        if !self.syslinereader.dt_pattern_has_year() &&
8✔
951
            !self.syslinereader.dt_pattern_uptime()
4✔
952
        {
953
            defo!("!dt_pattern_has_year()");
4✔
954
            let mtime: SystemTime = self.mtime();
4✔
955
            match self.process_missing_year(mtime, filter_dt_after_opt) {
4✔
956
                FileProcessingResultBlockZero::FileOk => {}
4✔
957
                result => {
×
958
                    defx!("Bad result {:?}", result);
×
959
                    return result;
×
960
                }
961
            }
962
        }
4✔
963

964
        defx!();
8✔
965

966
        FileProcessingResultBlockZero::FileOk
8✔
967
    }
8✔
968

969
    /// Stage 3: during "[streaming]", processed and printed data stored by
970
    /// underlying "Readers" is proactively dropped
971
    /// (removed from process memory).
972
    ///
973
    /// Also see [`find_sysline`].
974
    ///
975
    /// [streaming]: ProcessingStage#variant.Stage3StreamSyslines
976
    /// [`find_sysline`]: self::SyslogProcessor#method.find_sysline
977
    pub fn process_stage3_stream_syslines(&mut self) -> FileProcessingResultBlockZero {
4✔
978
        defñ!();
4✔
979
        self.assert_stage(ProcessingStage::Stage2FindDt);
4✔
980
        self.processingstage = ProcessingStage::Stage3StreamSyslines;
4✔
981

982
        FileProcessingResultBlockZero::FileOk
4✔
983
    }
4✔
984

985
    /// Stage 4: no more [`Sysline`s] to process. Create and return a
986
    /// [`Summary`].
987
    ///
988
    /// [`Summary`]: crate::readers::summary::Summary
989
    /// [`Sysline`s]: crate::data::sysline::Sysline
990
    pub fn process_stage4_summary(&mut self) -> Summary {
1✔
991
        defñ!();
1✔
992
        // XXX: this can be called from various stages, no need to assert
993
        self.processingstage = ProcessingStage::Stage4Summary;
1✔
994

995
        self.summary_complete()
1✔
996
    }
1✔
997

998
    /// Review bytes in the first block ("zero block").
999
    /// If enough `Line` found then return [`FileOk`]
1000
    /// else return [`FileErrNoLinesFound`].
1001
    ///
1002
    /// [`FileOk`]: self::FileProcessingResultBlockZero
1003
    /// [`FileErrNoLinesFound`]: self::FileProcessingResultBlockZero
1004
    pub(super) fn blockzero_analysis_bytes(&mut self) -> FileProcessingResultBlockZero {
77✔
1005
        defn!();
77✔
1006
        self.assert_stage(ProcessingStage::Stage1BlockzeroAnalysis);
77✔
1007

1008
        let blockp: BlockP = match self
77✔
1009
            .syslinereader
77✔
1010
            .linereader
77✔
1011
            .blockreader
77✔
1012
            .read_block(0)
77✔
1013
        {
1014
            ResultS3ReadBlock::Found(blockp_) => blockp_,
77✔
1015
            ResultS3ReadBlock::Done => {
×
1016
                defx!("return FileErrEmpty");
×
1017
                return FileProcessingResultBlockZero::FileErrEmpty;
×
1018
            }
1019
            ResultS3ReadBlock::Err(err) => {
×
1020
                self.set_error(&err);
×
1021
                defx!("return FileErrIo({:?})", err);
×
1022
                return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1023
            }
1024
        };
1025
        // if the first block is too small then there will not be enough
1026
        // data to parse a `Line` or `Sysline`
1027
        let blocksz0: BlockSz = (*blockp).len() as BlockSz;
77✔
1028
        let require_sz: BlockSz = std::cmp::min(Self::BLOCKZERO_ANALYSIS_BYTES_MIN, self.blocksz());
77✔
1029
        defo!("blocksz0 {} < {} require_sz", blocksz0, require_sz);
77✔
1030
        if blocksz0 < require_sz {
77✔
1031
            defx!("return FileErrTooSmall");
9✔
1032
            return FileProcessingResultBlockZero::FileErrTooSmall;
9✔
1033
        }
68✔
1034
        // if the first `BLOCKZERO_ANALYSIS_BYTES_NULL_MAX` bytes are all
1035
        // zero then this is not a text file and processing should stop.
1036
        if (*blockp).iter().take(Self::BLOCKZERO_ANALYSIS_BYTES_NULL_MAX).all(|&b| b == 0) {
210✔
1037
            defx!("return FileErrNullBytes");
2✔
1038
            return FileProcessingResultBlockZero::FileErrNullBytes;
2✔
1039
        }
66✔
1040

1041
        defx!("return FileOk");
66✔
1042

1043
        FileProcessingResultBlockZero::FileOk
66✔
1044
    }
77✔
1045

1046
    /// Attempt to find a minimum number of [`Line`s] within the first block
1047
    /// (block zero).
1048
    /// If enough `Line` found then return [`FileOk`]
1049
    /// else return [`FileErrNoLinesFound`].
1050
    ///
1051
    /// [`Line`s]: crate::data::line::Line
1052
    /// [`FileOk`]: self::FileProcessingResultBlockZero
1053
    /// [`FileErrNoLinesFound`]: self::FileProcessingResultBlockZero
1054
    pub(super) fn blockzero_analysis_lines(&mut self) -> FileProcessingResultBlockZero {
66✔
1055
        defn!();
66✔
1056
        self.assert_stage(ProcessingStage::Stage1BlockzeroAnalysis);
66✔
1057

1058
        let blockp: BlockP = match self
66✔
1059
            .syslinereader
66✔
1060
            .linereader
66✔
1061
            .blockreader
66✔
1062
            .read_block(0)
66✔
1063
        {
1064
            ResultS3ReadBlock::Found(blockp_) => blockp_,
66✔
1065
            ResultS3ReadBlock::Done => {
×
1066
                defx!("return FileErrEmpty");
×
1067
                return FileProcessingResultBlockZero::FileErrEmpty;
×
1068
            }
1069
            ResultS3ReadBlock::Err(err) => {
×
1070
                self.set_error(&err);
×
1071
                defx!("return FileErrIo({:?})", err);
×
1072
                return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1073
            }
1074
        };
1075
        let blocksz0: BlockSz = (*blockp).len() as BlockSz;
66✔
1076
        let mut _partial_found = false;
66✔
1077
        let mut fo: FileOffset = 0;
66✔
1078
        // how many lines have been found?
1079
        let mut found: Count = 0;
66✔
1080
        // must find at least this many lines in block zero to be FileOk
1081
        let found_min: Count = *BLOCKZERO_ANALYSIS_LINE_COUNT_MIN_MAP
66✔
1082
            .get(&blocksz0)
66✔
1083
            .unwrap();
66✔
1084
        defx!("block zero blocksz {} found_min {}", blocksz0, found_min);
66✔
1085
        // find `found_min` Lines or whatever can be found within block 0
1086
        while found < found_min {
101✔
1087
            fo = match self
66✔
1088
                .syslinereader
66✔
1089
                .linereader
66✔
1090
                .find_line_in_block(fo)
66✔
1091
            {
1092
                (ResultS3LineFind::Found((fo_next, _linep)), _) => {
39✔
1093
                    found += 1;
39✔
1094

1095
                    fo_next
39✔
1096
                }
1097
                (ResultS3LineFind::Done, partial) => {
27✔
1098
                    match partial {
27✔
1099
                        Some(_) => {
27✔
1100
                            found += 1;
27✔
1101
                            _partial_found = true;
27✔
1102
                        },
27✔
1103
                        None => {}
×
1104
                    }
1105
                    break;
27✔
1106
                }
1107
                (ResultS3LineFind::Err(err), _) => {
×
1108
                    self.set_error(&err);
×
1109
                    defx!("return FileErrIo({:?})", err);
×
1110
                    return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1111
                }
1112
            };
1113
            if 0 != self
39✔
1114
                .syslinereader
39✔
1115
                .linereader
39✔
1116
                .block_offset_at_file_offset(fo)
39✔
1117
            {
1118
                break;
4✔
1119
            }
35✔
1120
        }
1121

1122
        let fpr: FileProcessingResultBlockZero = match found >= found_min {
66✔
1123
            true => FileProcessingResultBlockZero::FileOk,
66✔
1124
            false => FileProcessingResultBlockZero::FileErrNoLinesFound,
×
1125
        };
1126

1127
        defx!("found {} lines, partial_found {}, require {} lines, return {:?}", found, _partial_found, found_min, fpr);
66✔
1128

1129
        fpr
66✔
1130
    }
66✔
1131

1132
    /// Attempt to find a minimum number of [`Sysline`] within the first block.
1133
    /// If enough `Sysline` found then return [`FileOk`]
1134
    /// else return [`FileErrNoSyslinesFound`].
1135
    ///
1136
    /// [`Sysline`]: crate::data::sysline::Sysline
1137
    /// [`FileOk`]: self::FileProcessingResultBlockZero
1138
    /// [`FileErrNoSyslinesFound`]: self::FileProcessingResultBlockZero
1139
    pub(super) fn blockzero_analysis_syslines(&mut self) -> FileProcessingResultBlockZero {
66✔
1140
        defn!();
66✔
1141
        self.assert_stage(ProcessingStage::Stage1BlockzeroAnalysis);
66✔
1142

1143
        let blockp: BlockP = match self
66✔
1144
            .syslinereader
66✔
1145
            .linereader
66✔
1146
            .blockreader
66✔
1147
            .read_block(0)
66✔
1148
        {
1149
            ResultS3ReadBlock::Found(blockp_) => blockp_,
66✔
1150
            ResultS3ReadBlock::Done => {
×
1151
                defx!("return FileErrEmpty");
×
1152
                return FileProcessingResultBlockZero::FileErrEmpty;
×
1153
            }
1154
            ResultS3ReadBlock::Err(err) => {
×
1155
                self.set_error(&err);
×
1156
                defx!("return FileErrIo({:?})", err);
×
1157
                return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1158
            }
1159
        };
1160
        let blocksz0: BlockSz = (*blockp).len() as BlockSz;
66✔
1161
        let mut fo: FileOffset = 0;
66✔
1162
        // how many syslines have been found?
1163
        let mut found: Count = 0;
66✔
1164
        // must find at least this many syslines in block zero to be FileOk
1165
        let found_min: Count = *BLOCKZERO_ANALYSIS_SYSLINE_COUNT_MIN_MAP
66✔
1166
            .get(&blocksz0)
66✔
1167
            .unwrap();
66✔
1168
        defo!("block zero blocksz {} found_min {:?}", blocksz0, found_min);
66✔
1169

1170
        // find `at_max` Syslines within block zero
1171
        while found < found_min
97✔
1172
            && self.syslinereader.block_offset_at_file_offset(fo) == 0
66✔
1173
        {
1174
            fo = match self
66✔
1175
                .syslinereader
66✔
1176
                .find_sysline_in_block(fo)
66✔
1177
            {
1178
                (ResultS3SyslineFind::Found((fo_next, _slinep)), _) => {
31✔
1179
                    found += 1;
31✔
1180
                    defo!("Found; found {} syslines, fo_next {}", found, fo_next);
31✔
1181

1182
                    fo_next
31✔
1183
                }
1184
                (ResultS3SyslineFind::Done, partial_found) => {
35✔
1185
                    defo!("Done; found {} syslines, partial_found {}", found, partial_found);
35✔
1186
                    if partial_found {
35✔
1187
                        found += 1;
8✔
1188
                    }
27✔
1189
                    break;
35✔
1190
                }
1191
                (ResultS3SyslineFind::Err(err), _) => {
×
1192
                    self.set_error(&err);
×
1193
                    defx!("return FileErrIo({:?})", err);
×
1194
                    return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1195
                }
1196
            };
1197
        }
1198

1199
        if found == 0 {
66✔
1200
            defx!("found {} syslines, require {} syslines, return FileErrNoSyslinesFound", found, found_min);
27✔
1201
            return FileProcessingResultBlockZero::FileErrNoSyslinesFound;
27✔
1202
        }
39✔
1203

1204
        let patt_count_a = self.syslinereader.dt_patterns_counts_in_use();
39✔
1205
        defo!("dt_patterns_counts_in_use {}", patt_count_a);
39✔
1206

1207
        if !self.syslinereader.dt_patterns_analysis() {
39✔
1208
            de_err!("dt_patterns_analysis() failed which is unexpected; return FileErrNoSyslinesFound");
×
1209
            return FileProcessingResultBlockZero::FileErrNoSyslinesFound;
×
1210
        }
39✔
1211

1212
        let _patt_count_b = self.syslinereader.dt_patterns_counts_in_use();
39✔
1213
        debug_assert_eq!(
39✔
1214
            _patt_count_b,
1215
            SyslogProcessor::DT_PATTERN_MAX,
1216
            "expected patterns to be reduced to {}, found {:?}",
×
1217
            SyslogProcessor::DT_PATTERN_MAX,
1218
            _patt_count_b,
1219
        );
1220

1221
        // if more than one `DateTimeParseInstr` was used then the syslines
1222
        // must be reparsed using the one chosen `DateTimeParseInstr`
1223
        if patt_count_a > 1 {
39✔
1224
            defo!("must reprocess all syslines using limited patterns (used {} DateTimeParseInstr; must only use {})!", patt_count_a, 1);
8✔
1225

1226
            self.syslinereader.clear_syslines();
8✔
1227
            // find `at_max` Syslines within block zero
1228
            found = 0;
8✔
1229
            fo = 0;
8✔
1230
            while found < found_min
11✔
1231
                && self.syslinereader.block_offset_at_file_offset(fo) == 0
8✔
1232
            {
1233
                fo = match self
8✔
1234
                    .syslinereader
8✔
1235
                    .find_sysline_in_block(fo)
8✔
1236
                {
1237
                    (ResultS3SyslineFind::Found((fo_next, _slinep)), _) => {
3✔
1238
                        found += 1;
3✔
1239
                        defo!("Found; found {} syslines, fo_next {}", found, fo_next);
3✔
1240

1241
                        fo_next
3✔
1242
                    }
1243
                    (ResultS3SyslineFind::Done, partial_found) => {
5✔
1244
                        defo!("Done; found {} syslines, partial_found {}", found, partial_found);
5✔
1245
                        if partial_found {
5✔
1246
                            found += 1;
5✔
1247
                        }
5✔
1248
                        break;
5✔
1249
                    }
1250
                    (ResultS3SyslineFind::Err(err), _) => {
×
1251
                        self.set_error(&err);
×
1252
                        defx!("return FileErrIo({:?})", err);
×
1253
                        return FileProcessingResultBlockZero::FileErrIoPath(err);
×
1254
                    }
1255
                };
1256
            }
1257
            defo!("done reprocessing.");
8✔
1258
        } else {
1259
            defo!("no reprocess needed ({} DateTimeParseInstr)!", patt_count_a);
31✔
1260
        }
1261

1262
        let fpr: FileProcessingResultBlockZero = match found >= found_min {
39✔
1263
            true => FileProcessingResultBlockZero::FileOk,
39✔
1264
            false => FileProcessingResultBlockZero::FileErrNoSyslinesFound,
×
1265
        };
1266

1267
        // sanity check that only one `DateTimeParseInstr` is in use
1268
        if cfg!(debug_assertions) && self.syslinereader.dt_patterns_counts_in_use() != 1 {
39✔
1269
            de_wrn!(
×
1270
                "dt_patterns_counts_in_use() = {}, expected 1; for {:?}",
×
1271
                self.syslinereader.dt_patterns_counts_in_use(), self.path()
×
1272
            );
×
1273
        }
39✔
1274

1275
        if self.syslinereader.is_streamed_file()
39✔
1276
            && !self.syslinereader.dt_pattern_has_year()
×
1277
        {
1278
            self.syslinereader.linereader.blockreader.disable_drop_data();
×
1279
            debug_assert!(!self.is_drop_data(), "is_drop_data() should be false");
×
1280
        }
39✔
1281

1282
        defx!("found {} syslines, require {} syslines, return {:?}", found, found_min, fpr);
39✔
1283

1284
        fpr
39✔
1285
    }
66✔
1286

1287
    /// Call `self.blockzero_analysis_lines`.
1288
    /// If that passes then call `self.blockzero_analysis_syslines`.
1289
    pub(super) fn blockzero_analysis(&mut self) -> FileProcessingResultBlockZero {
77✔
1290
        defn!();
77✔
1291
        assert!(!self.blockzero_analysis_done, "blockzero_analysis_lines should only be completed once.");
77✔
1292
        self.blockzero_analysis_done = true;
77✔
1293
        self.assert_stage(ProcessingStage::Stage1BlockzeroAnalysis);
77✔
1294

1295
        if self.syslinereader.filesz() == 0 {
77✔
1296
            defx!("return FileErrEmpty");
×
1297
            return FileProcessingResultBlockZero::FileErrEmpty;
×
1298
        }
77✔
1299

1300
        let result: FileProcessingResultBlockZero = self.blockzero_analysis_bytes();
77✔
1301
        if !result.is_ok() {
77✔
1302
            defx!("syslinereader.blockzero_analysis_bytes() was !is_ok(), return {:?}", result);
11✔
1303
            return result;
11✔
1304
        };
66✔
1305

1306
        let result: FileProcessingResultBlockZero = self.blockzero_analysis_lines();
66✔
1307
        if !result.is_ok() {
66✔
1308
            defx!("syslinereader.blockzero_analysis() was !is_ok(), return {:?}", result);
×
1309
            return result;
×
1310
        };
66✔
1311

1312
        let result: FileProcessingResultBlockZero = self.blockzero_analysis_syslines();
66✔
1313
        defx!("return {:?}", result);
66✔
1314

1315
        result
66✔
1316
    }
77✔
1317

1318
    #[cfg(test)]
1319
    pub(crate) fn dropped_blocks(&self) -> SetDroppedBlocks {
3✔
1320
        self.syslinereader
3✔
1321
            .linereader
3✔
1322
            .blockreader
3✔
1323
            .dropped_blocks
3✔
1324
            .clone()
3✔
1325
    }
3✔
1326

1327
    #[cfg(test)]
1328
    pub(crate) fn dropped_lines(&self) -> SetDroppedLines {
3✔
1329
        self.syslinereader
3✔
1330
            .linereader
3✔
1331
            .dropped_lines
3✔
1332
            .clone()
3✔
1333
    }
3✔
1334

1335
    #[cfg(test)]
1336
    pub(crate) fn dropped_syslines(&self) -> SetDroppedSyslines {
3✔
1337
        self.syslinereader
3✔
1338
            .dropped_syslines
3✔
1339
            .clone()
3✔
1340
    }
3✔
1341

1342
    pub fn summary(&self) -> SummarySyslogProcessor {
7✔
1343
        let syslogprocessor_missing_year = self.missing_year;
7✔
1344

1345
        SummarySyslogProcessor {
7✔
1346
            syslogprocessor_missing_year,
7✔
1347
        }
7✔
1348
    }
7✔
1349

1350
    /// Return an up-to-date [`Summary`] instance for this `SyslogProcessor`.
1351
    ///
1352
    /// Probably not useful or interesting before
1353
    /// `ProcessingStage::Stage4Summary`.
1354
    ///
1355
    /// [`Summary`]: crate::readers::summary::Summary
1356
    pub fn summary_complete(&self) -> Summary {
4✔
1357
        let path = self.path().clone();
4✔
1358
        let path_ntf = None;
4✔
1359
        let filetype = self.filetype();
4✔
1360
        let logmessagetype = filetype.to_logmessagetype();
4✔
1361
        let summaryblockreader = self.syslinereader.linereader.blockreader.summary();
4✔
1362
        let summarylinereader = self.syslinereader.linereader.summary();
4✔
1363
        let summarysyslinereader = self.syslinereader.summary();
4✔
1364
        let summarysyslogprocessor = self.summary();
4✔
1365
        let error: Option<String> = self.error.clone();
4✔
1366

1367
        Summary::new(
4✔
1368
            path,
4✔
1369
            path_ntf,
4✔
1370
            filetype,
4✔
1371
            logmessagetype,
4✔
1372
            Some(summaryblockreader),
4✔
1373
            Some(summarylinereader),
4✔
1374
            Some(summarysyslinereader),
4✔
1375
            Some(summarysyslogprocessor),
4✔
1376
            None,
4✔
1377
            None,
4✔
1378
            None,
4✔
1379
            error,
4✔
1380
        )
1381
    }
4✔
1382
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc