• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

kaidokert / picojson-rs / 16097308979

06 Jul 2025 08:55AM UTC coverage: 93.38% (-1.2%) from 94.616%
16097308979

push

github

web-flow
Streamparser and buffering fixes (#39)

Fixes stream parser buffering and actually compacts it properly. Streaming can now be done as expected with minimum size buffers

462 of 534 new or added lines in 5 files covered. (86.52%)

9 existing lines in 1 file now uncovered.

4119 of 4411 relevant lines covered (93.38%)

532.15 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.76
/picojson/src/stream_buffer.rs
1
// SPDX-License-Identifier: Apache-2.0
2

3
use crate::ParseError;
4

5
/// Error types for StreamBuffer operations
6
#[derive(Debug, PartialEq)]
7
pub enum StreamBufferError {
8
    /// Buffer is full and cannot accommodate more data
9
    BufferFull,
10
    /// Attempted to read beyond available data
11
    EndOfData,
12
    /// Invalid buffer state or operation
13
    InvalidState(&'static str),
14
}
15

16
impl From<StreamBufferError> for ParseError {
17
    fn from(err: StreamBufferError) -> Self {
×
18
        match err {
×
19
            StreamBufferError::BufferFull => ParseError::ScratchBufferFull,
×
20
            StreamBufferError::EndOfData => ParseError::EndOfData,
×
21
            StreamBufferError::InvalidState(msg) => ParseError::UnexpectedState(msg),
×
22
        }
23
    }
×
24
}
25

26
/// StreamBuffer manages a single buffer for both input and escape processing
27
///
28
/// Key design principles:
29
/// - Reader fills unused portions of buffer directly
30
/// - Unescaped content is copied to buffer start when needed
31
/// - Zero-copy string extraction when no escapes are present
32
/// - Guaranteed space for escape processing (unescaped ≤ escaped)
33
pub struct StreamBuffer<'a> {
34
    /// The entire buffer slice
35
    buffer: &'a mut [u8],
36
    /// Current position where tokenizer is reading
37
    tokenize_pos: usize,
38
    /// End of valid data from Reader (buffer[0..data_end] contains valid data)
39
    data_end: usize,
40
    /// Length of unescaped content at buffer start (0 if no unescaping active)
41
    unescaped_len: usize,
42
}
43

44
impl<'a> StreamBuffer<'a> {
45
    /// Panic-free copy_within implementation that handles overlapping ranges
46
    /// Based on memmove behavior but without panic machinery
47
    fn safe_copy_within(&mut self, src_start: usize, src_end: usize, dest: usize) {
619✔
48
        let count = src_end.saturating_sub(src_start);
619✔
49

50
        // Early return if nothing to copy or bounds are invalid
51
        if count == 0
619✔
52
            || src_start >= self.buffer.len()
619✔
53
            || src_end > self.buffer.len()
619✔
54
            || dest >= self.buffer.len()
619✔
55
        {
NEW
56
            return;
×
57
        }
619✔
58

59
        // Ensure dest + count doesn't exceed buffer
60
        let max_copy = (self.buffer.len().saturating_sub(dest)).min(count);
619✔
61
        if max_copy == 0 {
619✔
NEW
62
            return;
×
63
        }
619✔
64

65
        let iterator: &mut dyn Iterator<Item = usize> = if dest <= src_start {
619✔
66
            &mut (0..max_copy)
619✔
67
        } else {
NEW
68
            &mut (0..max_copy).rev()
×
69
        };
70

71
        for i in iterator {
7,271✔
72
            match (
73
                self.buffer.get(src_start.wrapping_add(i)).copied(),
6,652✔
74
                self.buffer.get_mut(dest.wrapping_add(i)),
6,652✔
75
            ) {
76
                (Some(src_byte), Some(dest_slot)) => {
6,652✔
77
                    *dest_slot = src_byte;
6,652✔
78
                }
6,652✔
NEW
79
                _ => {}
×
80
            }
81
        }
82
    }
619✔
83
    /// Create a new StreamBuffer with the given buffer slice
84
    pub fn new(buffer: &'a mut [u8]) -> Self {
1,046✔
85
        Self {
1,046✔
86
            buffer,
1,046✔
87
            tokenize_pos: 0,
1,046✔
88
            data_end: 0,
1,046✔
89
            unescaped_len: 0,
1,046✔
90
        }
1,046✔
91
    }
1,046✔
92

93
    /// Get the current byte at tokenize position
94
    pub fn current_byte(&self) -> Result<u8, StreamBufferError> {
17,381✔
95
        if self.tokenize_pos >= self.data_end {
17,381✔
96
            return Err(StreamBufferError::EndOfData);
1✔
97
        }
17,380✔
98
        self.buffer
17,380✔
99
            .get(self.tokenize_pos)
17,380✔
100
            .copied()
17,380✔
101
            .ok_or(StreamBufferError::EndOfData)
17,380✔
102
    }
17,381✔
103

104
    /// Advance the tokenize position by one byte
105
    pub fn advance(&mut self) -> Result<(), StreamBufferError> {
17,410✔
106
        if self.tokenize_pos >= self.data_end {
17,410✔
107
            return Err(StreamBufferError::EndOfData);
1✔
108
        }
17,409✔
109
        self.tokenize_pos = self.tokenize_pos.wrapping_add(1);
17,409✔
110
        Ok(())
17,409✔
111
    }
17,410✔
112

113
    /// Get remaining bytes available for reading
114
    pub fn remaining_bytes(&self) -> usize {
278✔
115
        self.data_end.saturating_sub(self.tokenize_pos)
278✔
116
    }
278✔
117

118
    /// Get slice for Reader to fill with new data
119
    /// Returns None if no space available
120
    pub fn get_fill_slice(&mut self) -> Option<&mut [u8]> {
7,927✔
121
        if self.data_end >= self.buffer.len() {
7,927✔
122
            return None;
1,793✔
123
        }
6,134✔
124
        self.buffer.get_mut(self.data_end..)
6,134✔
125
    }
7,927✔
126

127
    /// Compact buffer by moving unprocessed data from a given start offset to the beginning.
128
    ///
129
    /// # Arguments
130
    /// * `start_offset` - The position from which to preserve data.
131
    ///
132
    /// Returns the offset by which data was moved.
133
    pub fn compact_from(&mut self, start_offset: usize) -> Result<usize, StreamBufferError> {
1,798✔
134
        if start_offset == 0 {
1,798✔
135
            // Already at start, no compaction possible
136
            return Ok(0);
530✔
137
        }
1,268✔
138

139
        let offset = start_offset;
1,268✔
140

141
        if start_offset >= self.data_end {
1,268✔
142
            // All data has been processed, reset to start
143
            self.tokenize_pos = 0;
649✔
144
            self.data_end = 0;
649✔
145
            return Ok(offset);
649✔
146
        }
619✔
147

148
        // Move unprocessed data to start of buffer
149
        let remaining_data = self.data_end.saturating_sub(start_offset);
619✔
150

151
        // Copy existing content if there is any - EXACT same pattern as start_unescaping_with_copy
152
        if self.data_end > start_offset && start_offset < self.data_end {
619✔
153
            let span_len = remaining_data;
619✔
154

155
            // Ensure the span fits in the buffer - return error instead of silent truncation
156
            if span_len > self.buffer.len() {
619✔
NEW
157
                return Err(StreamBufferError::BufferFull);
×
158
            }
619✔
159

160
            let src_range = start_offset..start_offset.wrapping_add(span_len);
619✔
161
            if src_range.end > self.buffer.len() {
619✔
NEW
162
                return Err(StreamBufferError::InvalidState(
×
NEW
163
                    "Source range out of bounds",
×
NEW
164
                ));
×
165
            }
619✔
166

167
            // Copy within the same buffer: move data from [start_offset..end] to [0..span_len]
168
            // Use our panic-free copy implementation
169
            self.safe_copy_within(src_range.start, src_range.end, 0);
619✔
NEW
170
        }
×
171

172
        // Update positions
173
        self.tokenize_pos = self.tokenize_pos.saturating_sub(offset);
619✔
174
        self.data_end = remaining_data;
619✔
175

176
        Ok(offset)
619✔
177
    }
1,798✔
178

179
    /// Mark that Reader filled `bytes_read` bytes
180
    pub fn mark_filled(&mut self, bytes_read: usize) -> Result<(), StreamBufferError> {
6,131✔
181
        let new_data_end = self.data_end.wrapping_add(bytes_read);
6,131✔
182
        if new_data_end > self.buffer.len() {
6,131✔
183
            return Err(StreamBufferError::InvalidState(
×
184
                "Attempted to mark more bytes than buffer space",
×
185
            ));
×
186
        }
6,131✔
187
        self.data_end = new_data_end;
6,131✔
188
        Ok(())
6,131✔
189
    }
6,131✔
190

191
    /// Start unescaping and copy existing content from a range in the buffer
192
    /// This handles the common case of starting escape processing partway through a string
193
    pub fn start_unescaping_with_copy(
269✔
194
        &mut self,
269✔
195
        max_escaped_len: usize,
269✔
196
        copy_start: usize,
269✔
197
        copy_end: usize,
269✔
198
    ) -> Result<(), StreamBufferError> {
269✔
199
        // Clear any previous unescaped content
200
        self.unescaped_len = 0;
269✔
201

202
        // Ensure we have space at the start for unescaping
203
        if max_escaped_len > self.buffer.len() {
269✔
204
            return Err(StreamBufferError::BufferFull);
×
205
        }
269✔
206

207
        // Copy existing content if there is any
208
        if copy_end > copy_start && copy_start < self.data_end {
269✔
209
            let span_len = copy_end.saturating_sub(copy_start);
269✔
210

211
            // Ensure the span fits in the buffer - return error instead of silent truncation
212
            if span_len > self.buffer.len() {
269✔
213
                return Err(StreamBufferError::BufferFull);
1✔
214
            }
268✔
215

216
            let src_range = copy_start..copy_start.wrapping_add(span_len);
268✔
217
            if src_range.end > self.buffer.len() {
268✔
218
                return Err(StreamBufferError::InvalidState(
×
219
                    "Source range out of bounds",
×
220
                ));
×
221
            }
268✔
222

223
            // Copy within the same buffer: move data from [copy_start..copy_end] to [0..span_len]
224
            // Use copy_within to handle overlapping ranges safely
225
            self.buffer.copy_within(src_range, 0);
268✔
226
            self.unescaped_len = span_len;
268✔
227
        }
×
228

229
        Ok(())
268✔
230
    }
269✔
231

232
    /// Get the unescaped content slice
233
    pub fn get_unescaped_slice(&self) -> Result<&[u8], StreamBufferError> {
155✔
234
        if self.unescaped_len == 0 {
155✔
235
            return Err(StreamBufferError::InvalidState(
1✔
236
                "No unescaped content available",
1✔
237
            ));
1✔
238
        }
154✔
239
        self.buffer
154✔
240
            .get(0..self.unescaped_len)
154✔
241
            .ok_or(StreamBufferError::InvalidState(
154✔
242
                "Unescaped length exceeds buffer size",
154✔
243
            ))
154✔
244
    }
155✔
245

246
    /// Clear unescaped content (call after yielding unescaped string)
247
    pub fn clear_unescaped(&mut self) {
126✔
248
        self.unescaped_len = 0;
126✔
249
    }
126✔
250

251
    /// Get current tokenize position (for string start tracking)
252
    pub fn current_position(&self) -> usize {
2,700✔
253
        self.tokenize_pos
2,700✔
254
    }
2,700✔
255

256
    /// Check if buffer is empty (no more data to process)
257
    pub fn is_empty(&self) -> bool {
36,190✔
258
        self.tokenize_pos >= self.data_end
36,190✔
259
    }
36,190✔
260

261
    /// Check if we have unescaped content ready
262
    pub fn has_unescaped_content(&self) -> bool {
7,179✔
263
        self.unescaped_len > 0
7,179✔
264
    }
7,179✔
265

266
    /// Append a single byte to the unescaped content
267
    pub fn append_unescaped_byte(&mut self, byte: u8) -> Result<(), StreamBufferError> {
1,285✔
268
        if let Some(b) = self.buffer.get_mut(self.unescaped_len) {
1,285✔
269
            *b = byte;
1,284✔
270
            self.unescaped_len = self.unescaped_len.wrapping_add(1);
1,284✔
271
            Ok(())
1,284✔
272
        } else {
273
            Err(StreamBufferError::BufferFull)
1✔
274
        }
275
    }
1,285✔
276

277
    /// Get a string slice from the buffer (zero-copy)
278
    /// Used for strings without escapes
279
    pub fn get_string_slice(&self, start: usize, end: usize) -> Result<&[u8], StreamBufferError> {
750✔
280
        if start > end || end > self.data_end {
750✔
281
            return Err(StreamBufferError::InvalidState("Invalid slice bounds"));
×
282
        }
750✔
283
        self.buffer
750✔
284
            .get(start..end)
750✔
285
            .ok_or(StreamBufferError::InvalidState("Invalid slice bounds"))
750✔
286
    }
750✔
287
}
288

289
#[cfg(test)]
290
mod tests {
291
    use super::*;
292

293
    #[test]
294
    fn test_lifetime_expectations() {
1✔
295
        // This test demonstrates how StreamBuffer lifetimes should work
296
        let mut buffer = [0u8; 100];
1✔
297
        let mut stream_buffer = StreamBuffer::new(&mut buffer);
1✔
298

299
        // Simulate some data being in the buffer
300
        let test_data = b"hello world";
1✔
301
        stream_buffer.buffer[0..test_data.len()].copy_from_slice(test_data);
1✔
302
        stream_buffer.data_end = test_data.len();
1✔
303

304
        // Test that we can get buffer data
305

306
        // Test unescaped content - add some unescaped data
307
        stream_buffer.unescaped_len = 3;
1✔
308
        stream_buffer.buffer[0..3].copy_from_slice(b"abc");
1✔
309

310
        let unescaped_slice = stream_buffer.get_unescaped_slice().unwrap();
1✔
311
        assert_eq!(unescaped_slice, b"abc");
1✔
312

313
        // The key expectation: these slices should live as long as the original buffer
314
        // and be usable to create String::Borrowed(&'buffer str) and String::Unescaped(&'buffer str)
315
    }
1✔
316

317
    #[test]
318
    fn test_new_stream_buffer() {
1✔
319
        let mut buffer = [0u8; 100];
1✔
320
        let db = StreamBuffer::new(&mut buffer);
1✔
321

322
        assert_eq!(db.tokenize_pos, 0);
1✔
323
        assert_eq!(db.data_end, 0);
1✔
324
        assert_eq!(db.unescaped_len, 0);
1✔
325
        assert!(db.is_empty());
1✔
326
    }
1✔
327

328
    #[test]
329
    fn test_fill_and_advance() {
1✔
330
        let mut buffer = [0u8; 100];
1✔
331
        let mut db = StreamBuffer::new(&mut buffer);
1✔
332

333
        // Fill with some data
334
        {
1✔
335
            let fill_slice = db.get_fill_slice().unwrap();
1✔
336
            fill_slice[0..5].copy_from_slice(b"hello");
1✔
337
        }
1✔
338
        db.mark_filled(5).unwrap();
1✔
339

340
        assert_eq!(db.data_end, 5);
1✔
341
        assert_eq!(db.remaining_bytes(), 5);
1✔
342

343
        // Read bytes
344
        assert_eq!(db.current_byte().unwrap(), b'h');
1✔
345
        db.advance().unwrap();
1✔
346
        assert_eq!(db.current_byte().unwrap(), b'e');
1✔
347
        assert_eq!(db.remaining_bytes(), 4);
1✔
348
    }
1✔
349

350
    #[test]
351
    fn test_error_conditions() {
1✔
352
        let mut buffer = [0u8; 10];
1✔
353
        let mut db = StreamBuffer::new(&mut buffer);
1✔
354

355
        // EndOfData errors
356
        assert_eq!(db.current_byte().unwrap_err(), StreamBufferError::EndOfData);
1✔
357
        assert_eq!(db.advance().unwrap_err(), StreamBufferError::EndOfData);
1✔
358

359
        // No unescaped content
360
        assert!(db.get_unescaped_slice().is_err());
1✔
361
    }
1✔
362

363
    #[test]
364
    fn test_buffer_full_scenario() {
1✔
365
        // Test what happens when buffer gets completely full
366
        let mut buffer = [0u8; 10];
1✔
367
        let mut db = StreamBuffer::new(&mut buffer);
1✔
368

369
        // Fill buffer completely
370
        {
1✔
371
            let fill_slice = db.get_fill_slice().unwrap();
1✔
372
            fill_slice.copy_from_slice(b"0123456789");
1✔
373
        }
1✔
374
        db.mark_filled(10).unwrap();
1✔
375

376
        // No more space for filling
377
        assert!(db.get_fill_slice().is_none());
1✔
378

379
        // We can still read from buffer
380
        assert_eq!(db.current_byte().unwrap(), b'0');
1✔
381
        assert_eq!(db.remaining_bytes(), 10);
1✔
382
    }
1✔
383

384
    #[test]
385
    fn test_minimal_buffer_with_long_token() {
1✔
386
        // Test very small buffer with a token that doesn't fit
387
        let mut buffer = [0u8; 8]; // Very small buffer
1✔
388
        let mut db = StreamBuffer::new(&mut buffer);
1✔
389

390
        // Try to put a string that's almost as big as the buffer
391
        {
1✔
392
            let fill_slice = db.get_fill_slice().unwrap();
1✔
393
            fill_slice[0..6].copy_from_slice(b"\"hello"); // Start of a long string, no closing quote
1✔
394
        }
1✔
395
        db.mark_filled(6).unwrap();
1✔
396

397
        // Advance through the data
398
        for _ in 0..6 {
7✔
399
            db.advance().unwrap();
6✔
400
        }
6✔
401

402
        // Now buffer is exhausted but we don't have a complete token
403
        assert!(db.is_empty());
1✔
404
        assert_eq!(db.remaining_bytes(), 0);
1✔
405

406
        // This simulates the scenario where we need more data but can't fit it
407
        // The parser would need to handle this by buffering the incomplete token
408
    }
1✔
409

410
    #[test]
411
    fn test_reader_returns_zero_bytes() {
1✔
412
        let mut buffer = [0u8; 20];
1✔
413
        let mut db = StreamBuffer::new(&mut buffer);
1✔
414

415
        // Simulate Reader returning 0 bytes (EOF)
416
        {
417
            let fill_slice = db.get_fill_slice().unwrap();
1✔
418
            assert_eq!(fill_slice.len(), 20);
1✔
419
            // Reader returns 0 bytes - simulating EOF or no data available
420
        }
421
        db.mark_filled(0).unwrap(); // Reader returned 0
1✔
422

423
        assert!(db.is_empty());
1✔
424
        assert_eq!(db.data_end, 0);
1✔
425
        assert_eq!(db.remaining_bytes(), 0);
1✔
426

427
        // Should still be able to get fill slice for next attempt
428
        let fill_slice = db.get_fill_slice().unwrap();
1✔
429
        assert_eq!(fill_slice.len(), 20);
1✔
430
    }
1✔
431

432
    #[test]
433
    fn test_boundary_conditions() {
1✔
434
        let mut buffer = [0u8; 3]; // Absolute minimum
1✔
435
        let mut db = StreamBuffer::new(&mut buffer);
1✔
436

437
        // Can't even hold a proper JSON token, but should not crash
438
        {
1✔
439
            let fill_slice = db.get_fill_slice().unwrap();
1✔
440
            fill_slice.copy_from_slice(b"\"a\"");
1✔
441
        }
1✔
442
        db.mark_filled(3).unwrap();
1✔
443

444
        // Should be able to read through it
445
        assert_eq!(db.current_byte().unwrap(), b'"');
1✔
446
        db.advance().unwrap();
1✔
447
        assert_eq!(db.current_byte().unwrap(), b'a');
1✔
448
        db.advance().unwrap();
1✔
449
        assert_eq!(db.current_byte().unwrap(), b'"');
1✔
450
        db.advance().unwrap();
1✔
451

452
        assert!(db.is_empty());
1✔
453
    }
1✔
454

455
    #[test]
456
    fn test_start_unescaping_with_copy_span_too_large() {
1✔
457
        let mut buffer = [0u8; 10]; // Small buffer
1✔
458
        let mut db = StreamBuffer::new(&mut buffer);
1✔
459

460
        // Fill buffer with some data
461
        {
1✔
462
            let fill_slice = db.get_fill_slice().unwrap();
1✔
463
            fill_slice.copy_from_slice(b"0123456789");
1✔
464
        }
1✔
465
        db.mark_filled(10).unwrap();
1✔
466

467
        // Try to copy a span that's larger than the entire buffer
468
        let copy_start = 0;
1✔
469
        let copy_end = 15; // This span (15 bytes) is larger than buffer (10 bytes)
1✔
470
        let max_escaped_len = 5; // This is fine
1✔
471

472
        // Should return BufferFull error instead of silently truncating
473
        let result = db.start_unescaping_with_copy(max_escaped_len, copy_start, copy_end);
1✔
474
        assert_eq!(result.unwrap_err(), StreamBufferError::BufferFull);
1✔
475

476
        // Test boundary case: span exactly equals buffer size should work
477
        let copy_end_exact = 10; // Span of exactly 10 bytes (buffer size)
1✔
478
        let result = db.start_unescaping_with_copy(max_escaped_len, 0, copy_end_exact);
1✔
479
        assert!(result.is_ok());
1✔
480
        assert_eq!(db.unescaped_len, 10);
1✔
481

482
        // Test valid smaller span should work
483
        db.clear_unescaped();
1✔
484
        let result = db.start_unescaping_with_copy(max_escaped_len, 2, 6); // 4 byte span
1✔
485
        assert!(result.is_ok());
1✔
486
        assert_eq!(db.unescaped_len, 4);
1✔
487
        assert_eq!(db.get_unescaped_slice().unwrap(), b"2345");
1✔
488
    }
1✔
489

490
    #[test]
491
    fn test_append_unescaped_byte_uses_full_buffer() {
1✔
492
        let mut buffer = [0u8; 10]; // 10 byte buffer
1✔
493
        let mut db = StreamBuffer::new(&mut buffer);
1✔
494

495
        // Should be able to append up to buffer_len bytes (no more escape reserve!)
496
        for i in 0..10 {
11✔
497
            let result = db.append_unescaped_byte(b'A');
10✔
498
            assert!(result.is_ok(), "Failed at byte {}", i);
10✔
499
        }
500

501
        assert_eq!(db.unescaped_len, 10);
1✔
502

503
        // One more byte should fail because buffer is full
504
        let result = db.append_unescaped_byte(b'B');
1✔
505
        assert_eq!(result.unwrap_err(), StreamBufferError::BufferFull);
1✔
506
    }
1✔
507

508
    #[test]
509
    fn test_compact_basic() {
1✔
510
        let mut buffer = [0u8; 10];
1✔
511
        let mut db = StreamBuffer::new(&mut buffer);
1✔
512

513
        // Fill buffer with data: "0123456789"
514
        {
1✔
515
            let fill_slice = db.get_fill_slice().unwrap();
1✔
516
            fill_slice.copy_from_slice(b"0123456789");
1✔
517
        }
1✔
518
        db.mark_filled(10).unwrap();
1✔
519

520
        // Process some data (advance tokenize_pos to position 4)
521
        for _ in 0..4 {
5✔
522
            db.advance().unwrap();
4✔
523
        }
4✔
524

525
        // Before compact: tokenize_pos=4, data_end=10, remaining="456789"
526
        assert_eq!(db.tokenize_pos, 4);
1✔
527
        assert_eq!(db.data_end, 10);
1✔
528
        assert_eq!(db.remaining_bytes(), 6);
1✔
529

530
        // Compact the buffer
531
        let offset = db.compact_from(4).unwrap();
1✔
532
        assert_eq!(offset, 4); // Data was moved by 4 positions
1✔
533

534
        // After compact: tokenize_pos=0, data_end=6, buffer starts with "456789"
535
        assert_eq!(db.tokenize_pos, 0);
1✔
536
        assert_eq!(db.data_end, 6);
1✔
537
        assert_eq!(db.remaining_bytes(), 6);
1✔
538

539
        // Verify the data was moved correctly
540
        assert_eq!(db.current_byte().unwrap(), b'4');
1✔
541
        db.advance().unwrap();
1✔
542
        assert_eq!(db.current_byte().unwrap(), b'5');
1✔
543
        db.advance().unwrap();
1✔
544
        assert_eq!(db.current_byte().unwrap(), b'6');
1✔
545
    }
1✔
546

547
    #[test]
548
    fn test_compact_from_preserves_number() {
1✔
549
        let mut buffer = [0u8; 10];
1✔
550
        let mut db = StreamBuffer::new(&mut buffer);
1✔
551
        db.buffer.copy_from_slice(b"0123456789");
1✔
552
        db.data_end = 10;
1✔
553
        db.tokenize_pos = 5;
1✔
554
        let number_start_pos = 3;
1✔
555

556
        let offset = db.compact_from(number_start_pos).unwrap();
1✔
557
        assert_eq!(offset, 3);
1✔
558
        assert_eq!(db.tokenize_pos, 2); // 5 - 3
1✔
559
        assert_eq!(db.data_end, 7); // 10 - 3
1✔
560
        assert_eq!(&db.buffer[..db.data_end], b"3456789");
1✔
561
    }
1✔
562

563
    #[test]
564
    fn test_compact_no_op_when_at_start() {
1✔
565
        let mut buffer = [0u8; 10];
1✔
566
        let mut db = StreamBuffer::new(&mut buffer);
1✔
567

568
        // Fill buffer with data
569
        {
1✔
570
            let fill_slice = db.get_fill_slice().unwrap();
1✔
571
            fill_slice[0..5].copy_from_slice(b"hello");
1✔
572
        }
1✔
573
        db.mark_filled(5).unwrap();
1✔
574

575
        // Don't advance tokenize_pos (stays at 0)
576
        assert_eq!(db.tokenize_pos, 0);
1✔
577
        assert_eq!(db.data_end, 5);
1✔
578

579
        // Compact should be no-op
580
        let offset = db.compact_from(0).unwrap();
1✔
581
        assert_eq!(offset, 0); // No movement occurred
1✔
582

583
        // Should be unchanged
584
        assert_eq!(db.tokenize_pos, 0);
1✔
585
        assert_eq!(db.data_end, 5);
1✔
586
        assert_eq!(db.current_byte().unwrap(), b'h');
1✔
587
    }
1✔
588

589
    #[test]
590
    fn test_compact_all_data_processed() {
1✔
591
        let mut buffer = [0u8; 10];
1✔
592
        let mut db = StreamBuffer::new(&mut buffer);
1✔
593

594
        // Fill buffer with data
595
        {
1✔
596
            let fill_slice = db.get_fill_slice().unwrap();
1✔
597
            fill_slice[0..5].copy_from_slice(b"hello");
1✔
598
        }
1✔
599
        db.mark_filled(5).unwrap();
1✔
600

601
        // Process all data
602
        for _ in 0..5 {
6✔
603
            db.advance().unwrap();
5✔
604
        }
5✔
605

606
        // All data processed
607
        assert_eq!(db.tokenize_pos, 5);
1✔
608
        assert_eq!(db.data_end, 5);
1✔
609
        assert!(db.is_empty());
1✔
610

611
        // Compact should reset to start
612
        let offset = db.compact_from(5).unwrap();
1✔
613
        assert_eq!(offset, 5); // All data was processed, moved by 5
1✔
614

615
        // Should be reset to empty state
616
        assert_eq!(db.tokenize_pos, 0);
1✔
617
        assert_eq!(db.data_end, 0);
1✔
618
        assert!(db.is_empty());
1✔
619
    }
1✔
620

621
    #[test]
622
    fn test_compact_enables_new_data_fill() {
1✔
623
        let mut buffer = [0u8; 10];
1✔
624
        let mut db = StreamBuffer::new(&mut buffer);
1✔
625

626
        // Fill buffer completely
627
        {
1✔
628
            let fill_slice = db.get_fill_slice().unwrap();
1✔
629
            fill_slice.copy_from_slice(b"0123456789");
1✔
630
        }
1✔
631
        db.mark_filled(10).unwrap();
1✔
632

633
        // Process half the data
634
        for _ in 0..5 {
6✔
635
            db.advance().unwrap();
5✔
636
        }
5✔
637

638
        // Buffer is full, can't get fill slice
639
        assert!(db.get_fill_slice().is_none());
1✔
640

641
        // Compact to make space
642
        let offset = db.compact_from(5).unwrap();
1✔
643
        assert_eq!(offset, 5); // Data moved by 5 positions
1✔
644

645
        // Now should be able to get fill slice again
646
        let fill_slice = db.get_fill_slice().unwrap();
1✔
647
        assert_eq!(fill_slice.len(), 5); // 5 bytes available (10 - 5 remaining)
1✔
648

649
        // Fill with new data
650
        fill_slice[0..5].copy_from_slice(b"ABCDE");
1✔
651
        db.mark_filled(5).unwrap();
1✔
652

653
        // Verify combined data: "56789ABCDE"
654
        assert_eq!(db.data_end, 10);
1✔
655
        assert_eq!(db.current_byte().unwrap(), b'5');
1✔
656
        db.advance().unwrap();
1✔
657
        assert_eq!(db.current_byte().unwrap(), b'6');
1✔
658
        db.advance().unwrap();
1✔
659
        assert_eq!(db.current_byte().unwrap(), b'7');
1✔
660
        db.advance().unwrap();
1✔
661
        assert_eq!(db.current_byte().unwrap(), b'8');
1✔
662
        db.advance().unwrap();
1✔
663
        assert_eq!(db.current_byte().unwrap(), b'9');
1✔
664
        db.advance().unwrap();
1✔
665
        assert_eq!(db.current_byte().unwrap(), b'A');
1✔
666
    }
1✔
667

668
    #[test]
669
    fn test_compact_with_single_byte_remaining() {
1✔
670
        let mut buffer = [0u8; 5];
1✔
671
        let mut db = StreamBuffer::new(&mut buffer);
1✔
672

673
        // Fill buffer: "abcde"
674
        {
1✔
675
            let fill_slice = db.get_fill_slice().unwrap();
1✔
676
            fill_slice.copy_from_slice(b"abcde");
1✔
677
        }
1✔
678
        db.mark_filled(5).unwrap();
1✔
679

680
        // Process almost all data (leave one byte)
681
        for _ in 0..4 {
5✔
682
            db.advance().unwrap();
4✔
683
        }
4✔
684

685
        // One byte remaining
686
        assert_eq!(db.remaining_bytes(), 1);
1✔
687
        assert_eq!(db.current_byte().unwrap(), b'e');
1✔
688

689
        // Compact
690
        let offset = db.compact_from(4).unwrap();
1✔
691
        assert_eq!(offset, 4); // Moved by 4 positions
1✔
692

693
        // Should have moved the last byte to start
694
        assert_eq!(db.tokenize_pos, 0);
1✔
695
        assert_eq!(db.data_end, 1);
1✔
696
        assert_eq!(db.current_byte().unwrap(), b'e');
1✔
697
        assert_eq!(db.remaining_bytes(), 1);
1✔
698

699
        // Should have space for 4 more bytes
700
        let fill_slice = db.get_fill_slice().unwrap();
1✔
701
        assert_eq!(fill_slice.len(), 4);
1✔
702
    }
1✔
703

704
    #[test]
705
    fn test_compact_buffer_wall_scenario() {
1✔
706
        // Simulate hitting the buffer wall during token processing
707
        // This tests the "always compact when buffer full" strategy
708

709
        let mut buffer = [0u8; 10];
1✔
710
        let mut db = StreamBuffer::new(&mut buffer);
1✔
711

712
        // Fill buffer completely with: `{"hello_wo` (10 bytes, fills buffer exactly)
713
        {
1✔
714
            let fill_slice = db.get_fill_slice().unwrap();
1✔
715
            fill_slice.copy_from_slice(b"{\"hello_wo");
1✔
716
        }
1✔
717
        db.mark_filled(10).unwrap();
1✔
718

719
        // Process tokens: { " h e l l o _ w o
720
        // Parser is in State::String(2) tracking string start at position 2
721
        let mut _string_start_pos = 2; // Parser's state: string started at pos 2
1✔
722

723
        // Advance to simulate tokenizer processing
724
        for _ in 0..10 {
11✔
725
            db.advance().unwrap();
10✔
726
        }
10✔
727

728
        // Buffer is now empty, we hit the wall
729
        assert!(db.is_empty());
1✔
730
        assert!(db.get_fill_slice().is_none()); // No space to read more
1✔
731

732
        // ALWAYS compact when hitting buffer wall
733
        let offset = db.compact_from(10).unwrap();
1✔
734
        assert_eq!(offset, 10); // Moved by 10 positions (everything was processed)
1✔
735

736
        // Parser updates state: string_start_pos = 2 - 10 = -8
737
        // Since string_start_pos < 0, the original string start was discarded!
738
        // Parser must now switch to escape/copy mode for the continuation
739
        if _string_start_pos < offset {
1✔
740
            // Original string start was discarded - must use escape/copy mode
1✔
741
            // In real implementation, parser would copy what it had processed to unescaped buffer
1✔
742
            println!("String start was discarded, switching to escape mode");
1✔
743
            _string_start_pos = 0; // Reset for escape mode
1✔
744
        } else {
1✔
NEW
745
            _string_start_pos = _string_start_pos.saturating_sub(offset); // Normal position update
×
NEW
746
        }
×
747

748
        // After compaction, buffer is reset and ready for new data
749
        assert_eq!(db.tokenize_pos, 0);
1✔
750
        assert_eq!(db.data_end, 0);
1✔
751

752
        // Now we can read more data
753
        {
754
            let fill_slice = db.get_fill_slice().unwrap();
1✔
755
            assert_eq!(fill_slice.len(), 10); // Full buffer available
1✔
756
            fill_slice[0..3].copy_from_slice(b"rld");
1✔
757
        }
758
        db.mark_filled(3).unwrap();
1✔
759

760
        // Continue processing the string continuation
761
        assert_eq!(db.current_byte().unwrap(), b'r');
1✔
762
        assert_eq!(db.remaining_bytes(), 3);
1✔
763
    }
1✔
764

765
    #[test]
766
    fn test_compact_saves_partial_token() {
1✔
767
        // Test case where compaction saves partial token at end of buffer
768
        let mut buffer = [0u8; 8];
1✔
769
        let mut db = StreamBuffer::new(&mut buffer);
1✔
770

771
        // Fill buffer: {"hel|lo"} where we process up to 'l' and hit wall with "lo\"}" remaining
772
        {
1✔
773
            let fill_slice = db.get_fill_slice().unwrap();
1✔
774
            fill_slice.copy_from_slice(b"{\"hello\"");
1✔
775
        }
1✔
776
        db.mark_filled(8).unwrap();
1✔
777

778
        // Process: { " h e l - stop here with "lo\"" remaining
779
        for _ in 0..5 {
6✔
780
            db.advance().unwrap();
5✔
781
        }
5✔
782

783
        // Current state: parser at position 5, with "lo\"" remaining (3 bytes)
784
        let mut _string_start_pos = 2; // Parser state: string started at position 2
1✔
785
        assert_eq!(db.current_byte().unwrap(), b'l');
1✔
786
        assert_eq!(db.remaining_bytes(), 3);
1✔
787

788
        // Hit buffer wall, compact
789
        let offset = db.compact_from(5).unwrap();
1✔
790
        assert_eq!(offset, 5); // Moved data by 5 positions
1✔
791

792
        // Update parser state
793
        _string_start_pos = if _string_start_pos < offset {
1✔
794
            0 // Switch to escape mode - original start was discarded
1✔
795
        } else {
NEW
796
            _string_start_pos - offset // Normal position update: 2 - 5 = -3, so switch to escape mode
×
797
        };
798

799
        // After compaction: "lo\"" is now at start of buffer
800
        assert_eq!(db.tokenize_pos, 0);
1✔
801
        assert_eq!(db.data_end, 3);
1✔
802
        assert_eq!(db.current_byte().unwrap(), b'l');
1✔
803
        assert_eq!(db.remaining_bytes(), 3);
1✔
804

805
        // We saved 3 bytes, gained 5 bytes of space
806
        let fill_slice = db.get_fill_slice().unwrap();
1✔
807
        assert_eq!(fill_slice.len(), 5);
1✔
808
    }
1✔
809

810
    #[test]
811
    fn test_position_update_after_compaction_normal_case() {
1✔
812
        // Test normal position updates where positions are preserved
813

814
        // Case 1: String position preserved after compaction
815
        let _state = crate::shared::State::String(10);
1✔
816
        let offset = 5;
1✔
817

818
        // Simulate the position update logic
819
        let updated_pos = if 10 < offset {
1✔
NEW
820
            0 // Would need escape mode
×
821
        } else {
822
            10 - offset // Normal position update: 10 - 5 = 5
1✔
823
        };
824

825
        assert_eq!(updated_pos, 5);
1✔
826

827
        // Case 2: Key position preserved after compaction
828
        let key_pos = 8;
1✔
829
        let offset = 3;
1✔
830

831
        let updated_key_pos = if key_pos < offset {
1✔
NEW
832
            0 // Would need escape mode
×
833
        } else {
834
            key_pos - offset // Normal position update: 8 - 3 = 5
1✔
835
        };
836

837
        assert_eq!(updated_key_pos, 5);
1✔
838

839
        // Case 3: Number position preserved after compaction
840
        let number_pos = 15;
1✔
841
        let offset = 7;
1✔
842

843
        let updated_number_pos = if number_pos < offset {
1✔
844
            // Numbers should not normally lose their start position
NEW
845
            panic!("Number position discarded - buffer too small");
×
846
        } else {
847
            number_pos - offset // Normal position update: 15 - 7 = 8
1✔
848
        };
849

850
        assert_eq!(updated_number_pos, 8);
1✔
851
    }
1✔
852

853
    #[test]
854
    fn test_position_update_after_compaction_escape_mode_case() {
1✔
855
        // Test position updates where original positions are discarded (need escape mode)
856

857
        // Case 1: String position discarded - needs escape mode
858
        let string_pos = 3;
1✔
859
        let offset = 7; // Offset is larger than string position
1✔
860

861
        let needs_escape_mode = string_pos < offset;
1✔
862
        assert!(needs_escape_mode);
1✔
863

864
        let updated_string_pos = if needs_escape_mode {
1✔
865
            0 // Reset for escape mode
1✔
866
        } else {
NEW
867
            string_pos - offset
×
868
        };
869

870
        assert_eq!(updated_string_pos, 0);
1✔
871

872
        // Case 2: Key position discarded - needs escape mode
873
        let key_pos = 2;
1✔
874
        let offset = 8;
1✔
875

876
        let needs_escape_mode = key_pos < offset;
1✔
877
        assert!(needs_escape_mode);
1✔
878

879
        let updated_key_pos = if needs_escape_mode {
1✔
880
            0 // Reset for escape mode
1✔
881
        } else {
NEW
882
            key_pos - offset
×
883
        };
884

885
        assert_eq!(updated_key_pos, 0);
1✔
886

887
        // Case 3: Number position discarded - should be an error
888
        let number_pos = 1;
1✔
889
        let offset = 5;
1✔
890

891
        let should_error = number_pos < offset;
1✔
892
        assert!(should_error); // Numbers spanning compaction boundaries should error
1✔
893
    }
1✔
894

895
    #[test]
896
    fn test_position_update_boundary_conditions() {
1✔
897
        // Test exact boundary conditions for position updates
898

899
        // Case 1: Position exactly equals offset
900
        let pos = 5;
1✔
901
        let offset = 5;
1✔
902

903
        let needs_escape_mode = pos < offset; // false, pos == offset
1✔
904
        assert!(!needs_escape_mode);
1✔
905

906
        let updated_pos = pos - offset; // 5 - 5 = 0
1✔
907
        assert_eq!(updated_pos, 0);
1✔
908

909
        // Case 2: Position one less than offset (boundary case)
910
        let pos = 4;
1✔
911
        let offset = 5;
1✔
912

913
        let needs_escape_mode = pos < offset; // true, pos < offset
1✔
914
        assert!(needs_escape_mode);
1✔
915

916
        // Case 3: Position one more than offset (boundary case)
917
        let pos = 6;
1✔
918
        let offset = 5;
1✔
919

920
        let needs_escape_mode = pos < offset; // false, pos > offset
1✔
921
        assert!(!needs_escape_mode);
1✔
922

923
        let updated_pos = pos - offset; // 6 - 5 = 1
1✔
924
        assert_eq!(updated_pos, 1);
1✔
925

926
        // Case 4: Zero offset (no compaction occurred)
927
        let pos = 10;
1✔
928
        let offset = 0;
1✔
929

930
        let needs_escape_mode = pos < offset; // false, 10 < 0
1✔
931
        assert!(!needs_escape_mode);
1✔
932

933
        let updated_pos = pos - offset; // 10 - 0 = 10 (unchanged)
1✔
934
        assert_eq!(updated_pos, 10);
1✔
935
    }
1✔
936

937
    #[test]
938
    fn test_position_update_state_transitions() {
1✔
939
        // Test the complete state transition logic for different parser states
940

941
        // Mock the State enum variants and position update logic
942
        use crate::shared::State;
943

944
        // Case 1: State::None - no position to update
945
        let state = State::None;
1✔
946
        // No position updates needed for None state
947
        match state {
1✔
948
            State::None => {
1✔
949
                // No action needed - test passes
1✔
950
            }
1✔
NEW
951
            _ => panic!("Expected State::None"),
×
952
        }
953

954
        // Case 2: String state position updates
955
        let mut string_state = State::String(12);
1✔
956
        let offset = 8;
1✔
957

958
        match &mut string_state {
1✔
959
            State::String(pos) => {
1✔
960
                if *pos < offset {
1✔
NEW
961
                    // Would need escape mode
×
NEW
962
                    *pos = 0;
×
963
                } else {
1✔
964
                    *pos = pos.saturating_sub(offset); // 12 - 8 = 4
1✔
965
                }
1✔
966
            }
NEW
967
            _ => panic!("Expected State::String"),
×
968
        }
969

970
        match string_state {
1✔
971
            State::String(pos) => assert_eq!(pos, 4),
1✔
NEW
972
            _ => panic!("Expected State::String"),
×
973
        }
974

975
        // Case 3: Key state needing escape mode
976
        let mut key_state = State::Key(3);
1✔
977
        let offset = 10;
1✔
978

979
        match &mut key_state {
1✔
980
            State::Key(pos) => {
1✔
981
                if *pos < offset {
1✔
982
                    // Needs escape mode
1✔
983
                    *pos = 0;
1✔
984
                } else {
1✔
NEW
985
                    *pos = pos.saturating_sub(offset);
×
NEW
986
                }
×
987
            }
NEW
988
            _ => panic!("Expected State::Key"),
×
989
        }
990

991
        match key_state {
1✔
992
            State::Key(pos) => assert_eq!(pos, 0), // Reset for escape mode
1✔
NEW
993
            _ => panic!("Expected State::Key"),
×
994
        }
995

996
        // Case 4: Number state normal update
997
        let mut number_state = State::Number(20);
1✔
998
        let offset = 6;
1✔
999

1000
        match &mut number_state {
1✔
1001
            State::Number(pos) => {
1✔
1002
                if *pos < offset {
1✔
1003
                    // This should not happen for numbers in normal operation
NEW
1004
                    panic!("Number position discarded - buffer too small");
×
1005
                } else {
1✔
1006
                    *pos = pos.saturating_sub(offset); // 20 - 6 = 14
1✔
1007
                }
1✔
1008
            }
NEW
1009
            _ => panic!("Expected State::Number"),
×
1010
        }
1011

1012
        match number_state {
1✔
1013
            State::Number(pos) => assert_eq!(pos, 14),
1✔
NEW
1014
            _ => panic!("Expected State::Number"),
×
1015
        }
1016
    }
1✔
1017
}
1018

1019
impl crate::number_parser::NumberExtractor for StreamBuffer<'_> {
1020
    fn get_number_slice(
227✔
1021
        &self,
227✔
1022
        start: usize,
227✔
1023
        end: usize,
227✔
1024
    ) -> Result<&[u8], crate::shared::ParseError> {
227✔
1025
        self.get_string_slice(start, end)
227✔
1026
            .map_err(|_| crate::shared::ParseError::UnexpectedState("Invalid number slice bounds"))
227✔
1027
    }
227✔
1028

1029
    fn current_position(&self) -> usize {
227✔
1030
        self.tokenize_pos
227✔
1031
    }
227✔
1032

1033
    fn is_empty(&self) -> bool {
21✔
1034
        self.tokenize_pos >= self.data_end
21✔
1035
    }
21✔
1036
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc