• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

kaidokert / picojson-rs / 16028404404

02 Jul 2025 02:48PM UTC coverage: 94.592% (+0.1%) from 94.482%
16028404404

Pull #20

github

kaidokert
Experimental size optimizations
Pull Request #20: Experimental const driven size optimizations

105 of 108 new or added lines in 2 files covered. (97.22%)

5 existing lines in 2 files now uncovered.

2064 of 2182 relevant lines covered (94.59%)

138.64 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.59
/picojson/src/direct_buffer.rs
1
// SPDX-License-Identifier: Apache-2.0
2

3
use crate::ParseError;
4

5
/// Error types for DirectBuffer operations
6
#[derive(Debug, PartialEq)]
7
pub enum DirectBufferError {
8
    /// Buffer is full and cannot accommodate more data
9
    BufferFull,
10
    /// Attempted to read beyond available data
11
    EndOfData,
12
    /// Invalid buffer state or operation
13
    InvalidState(&'static str),
14
}
15

16
impl From<DirectBufferError> for ParseError {
17
    fn from(err: DirectBufferError) -> Self {
×
18
        match err {
×
19
            DirectBufferError::BufferFull => ParseError::ScratchBufferFull,
×
20
            DirectBufferError::EndOfData => ParseError::EndOfData,
×
21
            DirectBufferError::InvalidState(msg) => ParseError::UnexpectedState(msg),
×
22
        }
23
    }
×
24
}
25

26
/// DirectBuffer manages a single buffer for both input and escape processing
27
///
28
/// Key design principles:
29
/// - Reader fills unused portions of buffer directly
30
/// - Unescaped content is copied to buffer start when needed
31
/// - Zero-copy string extraction when no escapes are present
32
/// - Guaranteed space for escape processing (unescaped ≤ escaped)
33
pub struct DirectBuffer<'a> {
34
    /// The entire buffer slice
35
    buffer: &'a mut [u8],
36
    /// Current position where tokenizer is reading
37
    tokenize_pos: usize,
38
    /// End of valid data from Reader (buffer[0..data_end] contains valid data)
39
    data_end: usize,
40
    /// Length of unescaped content at buffer start (0 if no unescaping active)
41
    unescaped_len: usize,
42
    /// Minimum space to reserve for escape processing
43
    escape_reserve: usize,
44
}
45

46
impl<'a> DirectBuffer<'a> {
47
    /// Create a new DirectBuffer with the given buffer slice
48
    pub fn new(buffer: &'a mut [u8]) -> Self {
53✔
49
        // Reserve 10% of buffer for escape processing, minimum 64 bytes
50
        let escape_reserve = (buffer.len() / 10).max(64);
53✔
51

52
        Self {
53✔
53
            buffer,
53✔
54
            tokenize_pos: 0,
53✔
55
            data_end: 0,
53✔
56
            unescaped_len: 0,
53✔
57
            escape_reserve,
53✔
58
        }
53✔
59
    }
53✔
60

61
    /// Get the current byte at tokenize position
62
    pub fn current_byte(&self) -> Result<u8, DirectBufferError> {
597✔
63
        if self.tokenize_pos >= self.data_end {
597✔
64
            return Err(DirectBufferError::EndOfData);
1✔
65
        }
596✔
66
        Ok(self.buffer[self.tokenize_pos])
596✔
67
    }
597✔
68

69
    /// Advance the tokenize position by one byte
70
    pub fn advance(&mut self) -> Result<(), DirectBufferError> {
601✔
71
        if self.tokenize_pos >= self.data_end {
601✔
72
            return Err(DirectBufferError::EndOfData);
1✔
73
        }
600✔
74
        self.tokenize_pos += 1;
600✔
75
        Ok(())
600✔
76
    }
601✔
77

78
    /// Get remaining bytes available for reading
79
    pub fn remaining_bytes(&self) -> usize {
10✔
80
        self.data_end.saturating_sub(self.tokenize_pos)
10✔
81
    }
10✔
82

83
    /// Get slice for Reader to fill with new data
84
    /// Returns None if no space available
85
    pub fn get_fill_slice(&mut self) -> Option<&mut [u8]> {
629✔
86
        if self.data_end >= self.buffer.len() {
629✔
87
            return None;
1✔
88
        }
628✔
89
        Some(&mut self.buffer[self.data_end..])
628✔
90
    }
629✔
91

92
    /// Mark that Reader filled `bytes_read` bytes
93
    pub fn mark_filled(&mut self, bytes_read: usize) -> Result<(), DirectBufferError> {
627✔
94
        if self.data_end + bytes_read > self.buffer.len() {
627✔
UNCOV
95
            return Err(DirectBufferError::InvalidState(
×
96
                "Attempted to mark more bytes than buffer space",
×
97
            ));
×
98
        }
627✔
99
        self.data_end += bytes_read;
627✔
100
        Ok(())
627✔
101
    }
627✔
102

103
    /// Start unescaping and copy existing content from a range in the buffer
104
    /// This handles the common case of starting escape processing partway through a string
105
    pub fn start_unescaping_with_copy(
8✔
106
        &mut self,
8✔
107
        max_escaped_len: usize,
8✔
108
        copy_start: usize,
8✔
109
        copy_end: usize,
8✔
110
    ) -> Result<(), DirectBufferError> {
8✔
111
        // Clear any previous unescaped content
112
        self.unescaped_len = 0;
8✔
113

114
        // Ensure we have space at the start for unescaping
115
        if max_escaped_len > self.buffer.len() {
8✔
UNCOV
116
            return Err(DirectBufferError::BufferFull);
×
117
        }
8✔
118

119
        // Copy existing content if there is any
120
        if copy_end > copy_start && copy_start < self.data_end {
8✔
121
            let span_len = copy_end - copy_start;
8✔
122

123
            // Ensure the span fits in the buffer - return error instead of silent truncation
124
            if span_len > self.buffer.len() {
8✔
125
                return Err(DirectBufferError::BufferFull);
1✔
126
            }
7✔
127

128
            // Copy within the same buffer: move data from [copy_start..copy_end] to [0..span_len]
129
            // Use copy_within to handle overlapping ranges safely
130
            self.buffer
7✔
131
                .copy_within(copy_start..copy_start + span_len, 0);
7✔
132
            self.unescaped_len = span_len;
7✔
UNCOV
133
        }
×
134

135
        Ok(())
7✔
136
    }
8✔
137

138
    /// Get the unescaped content slice
139
    pub fn get_unescaped_slice(&self) -> Result<&[u8], DirectBufferError> {
8✔
140
        if self.unescaped_len == 0 {
8✔
141
            return Err(DirectBufferError::InvalidState(
1✔
142
                "No unescaped content available",
1✔
143
            ));
1✔
144
        }
7✔
145
        Ok(&self.buffer[0..self.unescaped_len])
7✔
146
    }
8✔
147

148
    /// Clear unescaped content (call after yielding unescaped string)
149
    pub fn clear_unescaped(&mut self) {
3✔
150
        self.unescaped_len = 0;
3✔
151
    }
3✔
152

153
    /// Get current tokenize position (for string start tracking)
154
    pub fn current_position(&self) -> usize {
106✔
155
        self.tokenize_pos
106✔
156
    }
106✔
157

158
    /// Check if buffer is empty (no more data to process)
159
    pub fn is_empty(&self) -> bool {
625✔
160
        self.tokenize_pos >= self.data_end
625✔
161
    }
625✔
162

163
    /// Check if we have unescaped content ready
164
    pub fn has_unescaped_content(&self) -> bool {
212✔
165
        self.unescaped_len > 0
212✔
166
    }
212✔
167

168
    /// Append a single byte to the unescaped content
169
    pub fn append_unescaped_byte(&mut self, byte: u8) -> Result<(), DirectBufferError> {
83✔
170
        let available_space = self.buffer.len().saturating_sub(self.escape_reserve);
83✔
171
        if self.unescaped_len >= available_space {
83✔
172
            return Err(DirectBufferError::BufferFull);
3✔
173
        }
80✔
174

175
        self.buffer[self.unescaped_len] = byte;
80✔
176
        self.unescaped_len += 1;
80✔
177
        Ok(())
80✔
178
    }
83✔
179

180
    /// Get a string slice from the buffer (zero-copy)
181
    /// Used for strings without escapes
182
    pub fn get_string_slice(&self, start: usize, end: usize) -> Result<&[u8], DirectBufferError> {
68✔
183
        if start > end || end > self.data_end {
68✔
UNCOV
184
            return Err(DirectBufferError::InvalidState("Invalid slice bounds"));
×
185
        }
68✔
186
        Ok(&self.buffer[start..end])
68✔
187
    }
68✔
188
}
189

190
#[cfg(test)]
191
mod tests {
192
    use super::*;
193

194
    #[test]
195
    fn test_lifetime_expectations() {
1✔
196
        // This test demonstrates how DirectBuffer lifetimes should work
197
        let mut buffer = [0u8; 100];
1✔
198
        let mut direct_buffer = DirectBuffer::new(&mut buffer);
1✔
199

200
        // Simulate some data being in the buffer
201
        let test_data = b"hello world";
1✔
202
        direct_buffer.buffer[0..test_data.len()].copy_from_slice(test_data);
1✔
203
        direct_buffer.data_end = test_data.len();
1✔
204

205
        // Test that we can get buffer data
206

207
        // Test unescaped content - add some unescaped data
208
        direct_buffer.unescaped_len = 3;
1✔
209
        direct_buffer.buffer[0..3].copy_from_slice(b"abc");
1✔
210

211
        let unescaped_slice = direct_buffer.get_unescaped_slice().unwrap();
1✔
212
        assert_eq!(unescaped_slice, b"abc");
1✔
213

214
        // The key expectation: these slices should live as long as the original buffer
215
        // and be usable to create String::Borrowed(&'buffer str) and String::Unescaped(&'buffer str)
216
    }
1✔
217

218
    #[test]
219
    fn test_new_direct_buffer() {
1✔
220
        let mut buffer = [0u8; 100];
1✔
221
        let db = DirectBuffer::new(&mut buffer);
1✔
222

223
        assert_eq!(db.tokenize_pos, 0);
1✔
224
        assert_eq!(db.data_end, 0);
1✔
225
        assert_eq!(db.unescaped_len, 0);
1✔
226
        assert_eq!(db.escape_reserve, 64); // 10% of 100, minimum 64
1✔
227
        assert!(db.is_empty());
1✔
228
    }
1✔
229

230
    #[test]
231
    fn test_fill_and_advance() {
1✔
232
        let mut buffer = [0u8; 100];
1✔
233
        let mut db = DirectBuffer::new(&mut buffer);
1✔
234

235
        // Fill with some data
236
        {
1✔
237
            let fill_slice = db.get_fill_slice().unwrap();
1✔
238
            fill_slice[0..5].copy_from_slice(b"hello");
1✔
239
        }
1✔
240
        db.mark_filled(5).unwrap();
1✔
241

242
        assert_eq!(db.data_end, 5);
1✔
243
        assert_eq!(db.remaining_bytes(), 5);
1✔
244

245
        // Read bytes
246
        assert_eq!(db.current_byte().unwrap(), b'h');
1✔
247
        db.advance().unwrap();
1✔
248
        assert_eq!(db.current_byte().unwrap(), b'e');
1✔
249
        assert_eq!(db.remaining_bytes(), 4);
1✔
250
    }
1✔
251

252
    #[test]
253
    fn test_error_conditions() {
1✔
254
        let mut buffer = [0u8; 10];
1✔
255
        let mut db = DirectBuffer::new(&mut buffer);
1✔
256

257
        // EndOfData errors
258
        assert_eq!(db.current_byte().unwrap_err(), DirectBufferError::EndOfData);
1✔
259
        assert_eq!(db.advance().unwrap_err(), DirectBufferError::EndOfData);
1✔
260

261
        // No unescaped content
262
        assert!(db.get_unescaped_slice().is_err());
1✔
263
    }
1✔
264

265
    #[test]
266
    fn test_buffer_full_scenario() {
1✔
267
        // Test what happens when buffer gets completely full
268
        let mut buffer = [0u8; 10];
1✔
269
        let mut db = DirectBuffer::new(&mut buffer);
1✔
270

271
        // Fill buffer completely
272
        {
1✔
273
            let fill_slice = db.get_fill_slice().unwrap();
1✔
274
            fill_slice.copy_from_slice(b"0123456789");
1✔
275
        }
1✔
276
        db.mark_filled(10).unwrap();
1✔
277

278
        // No more space for filling
279
        assert!(db.get_fill_slice().is_none());
1✔
280

281
        // We can still read from buffer
282
        assert_eq!(db.current_byte().unwrap(), b'0');
1✔
283
        assert_eq!(db.remaining_bytes(), 10);
1✔
284
    }
1✔
285

286
    #[test]
287
    fn test_minimal_buffer_with_long_token() {
1✔
288
        // Test very small buffer with a token that doesn't fit
289
        let mut buffer = [0u8; 8]; // Very small buffer
1✔
290
        let mut db = DirectBuffer::new(&mut buffer);
1✔
291

292
        // Try to put a string that's almost as big as the buffer
293
        {
1✔
294
            let fill_slice = db.get_fill_slice().unwrap();
1✔
295
            fill_slice[0..6].copy_from_slice(b"\"hello"); // Start of a long string, no closing quote
1✔
296
        }
1✔
297
        db.mark_filled(6).unwrap();
1✔
298

299
        // Advance through the data
300
        for _ in 0..6 {
7✔
301
            db.advance().unwrap();
6✔
302
        }
6✔
303

304
        // Now buffer is exhausted but we don't have a complete token
305
        assert!(db.is_empty());
1✔
306
        assert_eq!(db.remaining_bytes(), 0);
1✔
307

308
        // This simulates the scenario where we need more data but can't fit it
309
        // The parser would need to handle this by buffering the incomplete token
310
    }
1✔
311

312
    #[test]
313
    fn test_reader_returns_zero_bytes() {
1✔
314
        let mut buffer = [0u8; 20];
1✔
315
        let mut db = DirectBuffer::new(&mut buffer);
1✔
316

317
        // Simulate Reader returning 0 bytes (EOF)
318
        {
319
            let fill_slice = db.get_fill_slice().unwrap();
1✔
320
            assert_eq!(fill_slice.len(), 20);
1✔
321
            // Reader returns 0 bytes - simulating EOF or no data available
322
        }
323
        db.mark_filled(0).unwrap(); // Reader returned 0
1✔
324

325
        assert!(db.is_empty());
1✔
326
        assert_eq!(db.data_end, 0);
1✔
327
        assert_eq!(db.remaining_bytes(), 0);
1✔
328

329
        // Should still be able to get fill slice for next attempt
330
        let fill_slice = db.get_fill_slice().unwrap();
1✔
331
        assert_eq!(fill_slice.len(), 20);
1✔
332
    }
1✔
333

334
    #[test]
335
    fn test_maximum_escape_reserve_scenario() {
1✔
336
        let mut buffer = [0u8; 100];
1✔
337
        let db = DirectBuffer::new(&mut buffer);
1✔
338

339
        // Check escape reserve calculation
340
        assert_eq!(db.escape_reserve, 64); // max(100/10, 64) = 64
1✔
341

342
        // Test with smaller buffer
343
        let mut small_buffer = [0u8; 50];
1✔
344
        let small_db = DirectBuffer::new(&mut small_buffer);
1✔
345
        assert_eq!(small_db.escape_reserve, 64); // Still 64 (minimum)
1✔
346

347
        // Test with larger buffer
348
        let mut large_buffer = [0u8; 1000];
1✔
349
        let large_db = DirectBuffer::new(&mut large_buffer);
1✔
350
        assert_eq!(large_db.escape_reserve, 100); // 1000/10 = 100
1✔
351
    }
1✔
352

353
    #[test]
354
    fn test_boundary_conditions() {
1✔
355
        let mut buffer = [0u8; 3]; // Absolute minimum
1✔
356
        let mut db = DirectBuffer::new(&mut buffer);
1✔
357

358
        // Can't even hold a proper JSON token, but should not crash
359
        {
1✔
360
            let fill_slice = db.get_fill_slice().unwrap();
1✔
361
            fill_slice.copy_from_slice(b"\"a\"");
1✔
362
        }
1✔
363
        db.mark_filled(3).unwrap();
1✔
364

365
        // Should be able to read through it
366
        assert_eq!(db.current_byte().unwrap(), b'"');
1✔
367
        db.advance().unwrap();
1✔
368
        assert_eq!(db.current_byte().unwrap(), b'a');
1✔
369
        db.advance().unwrap();
1✔
370
        assert_eq!(db.current_byte().unwrap(), b'"');
1✔
371
        db.advance().unwrap();
1✔
372

373
        assert!(db.is_empty());
1✔
374
    }
1✔
375

376
    #[test]
377
    fn test_start_unescaping_with_copy_span_too_large() {
1✔
378
        let mut buffer = [0u8; 10]; // Small buffer
1✔
379
        let mut db = DirectBuffer::new(&mut buffer);
1✔
380

381
        // Fill buffer with some data
382
        {
1✔
383
            let fill_slice = db.get_fill_slice().unwrap();
1✔
384
            fill_slice.copy_from_slice(b"0123456789");
1✔
385
        }
1✔
386
        db.mark_filled(10).unwrap();
1✔
387

388
        // Try to copy a span that's larger than the entire buffer
389
        let copy_start = 0;
1✔
390
        let copy_end = 15; // This span (15 bytes) is larger than buffer (10 bytes)
1✔
391
        let max_escaped_len = 5; // This is fine
1✔
392

393
        // Should return BufferFull error instead of silently truncating
394
        let result = db.start_unescaping_with_copy(max_escaped_len, copy_start, copy_end);
1✔
395
        assert_eq!(result.unwrap_err(), DirectBufferError::BufferFull);
1✔
396

397
        // Test boundary case: span exactly equals buffer size should work
398
        let copy_end_exact = 10; // Span of exactly 10 bytes (buffer size)
1✔
399
        let result = db.start_unescaping_with_copy(max_escaped_len, 0, copy_end_exact);
1✔
400
        assert!(result.is_ok());
1✔
401
        assert_eq!(db.unescaped_len, 10);
1✔
402

403
        // Test valid smaller span should work
404
        db.clear_unescaped();
1✔
405
        let result = db.start_unescaping_with_copy(max_escaped_len, 2, 6); // 4 byte span
1✔
406
        assert!(result.is_ok());
1✔
407
        assert_eq!(db.unescaped_len, 4);
1✔
408
        assert_eq!(db.get_unescaped_slice().unwrap(), b"2345");
1✔
409
    }
1✔
410

411
    #[test]
412
    fn test_append_unescaped_byte_respects_escape_reserve() {
1✔
413
        let mut buffer = [0u8; 100]; // 100 byte buffer
1✔
414
        let mut db = DirectBuffer::new(&mut buffer);
1✔
415

416
        // Check escape reserve was set correctly (10% of 100, minimum 64)
417
        assert_eq!(db.escape_reserve, 64);
1✔
418

419
        // Should be able to append up to (buffer_len - escape_reserve) bytes
420
        let max_unescaped = 100 - db.escape_reserve; // 100 - 64 = 36
1✔
421

422
        // Fill up to the limit - should succeed
423
        for i in 0..max_unescaped {
36✔
424
            let result = db.append_unescaped_byte(b'A');
36✔
425
            assert!(result.is_ok(), "Failed at byte {}", i);
36✔
426
        }
427

428
        assert_eq!(db.unescaped_len, max_unescaped);
1✔
429

430
        // One more byte should fail due to escape reserve constraint
431
        let result = db.append_unescaped_byte(b'B');
1✔
432
        assert_eq!(result.unwrap_err(), DirectBufferError::BufferFull);
1✔
433

434
        // Verify we didn't exceed the escape reserve boundary
435
        assert_eq!(db.unescaped_len, max_unescaped);
1✔
436
    }
1✔
437

438
    #[test]
439
    fn test_append_unescaped_byte_escape_reserve_larger_than_buffer() {
1✔
440
        let mut buffer = [0u8; 10]; // Very small buffer
1✔
441
        let mut db = DirectBuffer::new(&mut buffer);
1✔
442

443
        // Even small buffers get minimum 64 byte escape reserve, but that's larger than buffer
444
        assert_eq!(db.escape_reserve, 64); // minimum
1✔
445

446
        // Since escape_reserve (64) > buffer.len() (10), no bytes should be appendable
447
        // This should not panic with underflow, but return BufferFull error
448
        let result = db.append_unescaped_byte(b'A');
1✔
449
        assert_eq!(result.unwrap_err(), DirectBufferError::BufferFull);
1✔
450

451
        // Test with even smaller buffer to ensure we handle underflow correctly
452
        let mut tiny_buffer = [0u8; 3];
1✔
453
        let mut tiny_db = DirectBuffer::new(&mut tiny_buffer);
1✔
454
        assert_eq!(tiny_db.escape_reserve, 64); // Still minimum 64
1✔
455

456
        // Should handle this gracefully without panic
457
        let result = tiny_db.append_unescaped_byte(b'B');
1✔
458
        assert_eq!(result.unwrap_err(), DirectBufferError::BufferFull);
1✔
459
    }
1✔
460
}
461

462
impl crate::number_parser::NumberExtractor for DirectBuffer<'_> {
463
    fn get_number_slice(
38✔
464
        &self,
38✔
465
        start: usize,
38✔
466
        end: usize,
38✔
467
    ) -> Result<&[u8], crate::shared::ParseError> {
38✔
468
        self.get_string_slice(start, end)
38✔
469
            .map_err(|_| crate::shared::ParseError::UnexpectedState("Invalid number slice bounds"))
38✔
470
    }
38✔
471

472
    fn current_position(&self) -> usize {
38✔
473
        self.tokenize_pos
38✔
474
    }
38✔
475

476
    fn is_empty(&self) -> bool {
21✔
477
        self.tokenize_pos >= self.data_end
21✔
478
    }
21✔
479
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc