• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

kaidokert / picojson-rs / 16083616169

05 Jul 2025 02:26AM UTC coverage: 94.358%. Remained the same
16083616169

push

github

web-flow
Escape reserve in direct_buffer (#29)

2191 of 2322 relevant lines covered (94.36%)

132.6 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.6
/picojson/src/direct_buffer.rs
1
// SPDX-License-Identifier: Apache-2.0
2

3
use crate::ParseError;
4

5
/// Error types for DirectBuffer operations
6
#[derive(Debug, PartialEq)]
7
pub enum DirectBufferError {
8
    /// Buffer is full and cannot accommodate more data
9
    BufferFull,
10
    /// Attempted to read beyond available data
11
    EndOfData,
12
    /// Invalid buffer state or operation
13
    InvalidState(&'static str),
14
}
15

16
impl From<DirectBufferError> for ParseError {
17
    fn from(err: DirectBufferError) -> Self {
×
18
        match err {
×
19
            DirectBufferError::BufferFull => ParseError::ScratchBufferFull,
×
20
            DirectBufferError::EndOfData => ParseError::EndOfData,
×
21
            DirectBufferError::InvalidState(msg) => ParseError::UnexpectedState(msg),
×
22
        }
23
    }
×
24
}
25

26
/// DirectBuffer manages a single buffer for both input and escape processing
27
///
28
/// Key design principles:
29
/// - Reader fills unused portions of buffer directly
30
/// - Unescaped content is copied to buffer start when needed
31
/// - Zero-copy string extraction when no escapes are present
32
/// - Guaranteed space for escape processing (unescaped ≤ escaped)
33
pub struct DirectBuffer<'a> {
34
    /// The entire buffer slice
35
    buffer: &'a mut [u8],
36
    /// Current position where tokenizer is reading
37
    tokenize_pos: usize,
38
    /// End of valid data from Reader (buffer[0..data_end] contains valid data)
39
    data_end: usize,
40
    /// Length of unescaped content at buffer start (0 if no unescaping active)
41
    unescaped_len: usize,
42
    /// Minimum space to reserve for escape processing
43
    escape_reserve: usize,
44
}
45

46
impl<'a> DirectBuffer<'a> {
47
    /// Create a new DirectBuffer with the given buffer slice
48
    pub fn new(buffer: &'a mut [u8]) -> Self {
53✔
49
        // Reserve ~12.5% of buffer for escape processing (>>3 instead of /10), minimum 64 bytes
50
        // Avoids expensive 32-bit division on 8-bit AVR targets
51
        let escape_reserve = (buffer.len() >> 3).max(64);
53✔
52

53
        Self {
53✔
54
            buffer,
53✔
55
            tokenize_pos: 0,
53✔
56
            data_end: 0,
53✔
57
            unescaped_len: 0,
53✔
58
            escape_reserve,
53✔
59
        }
53✔
60
    }
53✔
61

62
    /// Get the current byte at tokenize position
63
    pub fn current_byte(&self) -> Result<u8, DirectBufferError> {
597✔
64
        if self.tokenize_pos >= self.data_end {
597✔
65
            return Err(DirectBufferError::EndOfData);
1✔
66
        }
596✔
67
        Ok(self.buffer[self.tokenize_pos])
596✔
68
    }
597✔
69

70
    /// Advance the tokenize position by one byte
71
    pub fn advance(&mut self) -> Result<(), DirectBufferError> {
601✔
72
        if self.tokenize_pos >= self.data_end {
601✔
73
            return Err(DirectBufferError::EndOfData);
1✔
74
        }
600✔
75
        self.tokenize_pos = self.tokenize_pos.wrapping_add(1);
600✔
76
        Ok(())
600✔
77
    }
601✔
78

79
    /// Get remaining bytes available for reading
80
    pub fn remaining_bytes(&self) -> usize {
10✔
81
        self.data_end.saturating_sub(self.tokenize_pos)
10✔
82
    }
10✔
83

84
    /// Get slice for Reader to fill with new data
85
    /// Returns None if no space available
86
    pub fn get_fill_slice(&mut self) -> Option<&mut [u8]> {
629✔
87
        if self.data_end >= self.buffer.len() {
629✔
88
            return None;
1✔
89
        }
628✔
90
        Some(&mut self.buffer[self.data_end..])
628✔
91
    }
629✔
92

93
    /// Mark that Reader filled `bytes_read` bytes
94
    pub fn mark_filled(&mut self, bytes_read: usize) -> Result<(), DirectBufferError> {
627✔
95
        let new_data_end = self.data_end.wrapping_add(bytes_read);
627✔
96
        if new_data_end > self.buffer.len() {
627✔
97
            return Err(DirectBufferError::InvalidState(
×
98
                "Attempted to mark more bytes than buffer space",
×
99
            ));
×
100
        }
627✔
101
        self.data_end = new_data_end;
627✔
102
        Ok(())
627✔
103
    }
627✔
104

105
    /// Start unescaping and copy existing content from a range in the buffer
106
    /// This handles the common case of starting escape processing partway through a string
107
    pub fn start_unescaping_with_copy(
8✔
108
        &mut self,
8✔
109
        max_escaped_len: usize,
8✔
110
        copy_start: usize,
8✔
111
        copy_end: usize,
8✔
112
    ) -> Result<(), DirectBufferError> {
8✔
113
        // Clear any previous unescaped content
114
        self.unescaped_len = 0;
8✔
115

116
        // Ensure we have space at the start for unescaping
117
        if max_escaped_len > self.buffer.len() {
8✔
118
            return Err(DirectBufferError::BufferFull);
×
119
        }
8✔
120

121
        // Copy existing content if there is any
122
        if copy_end > copy_start && copy_start < self.data_end {
8✔
123
            let span_len = copy_end.saturating_sub(copy_start);
8✔
124

125
            // Ensure the span fits in the buffer - return error instead of silent truncation
126
            if span_len > self.buffer.len() {
8✔
127
                return Err(DirectBufferError::BufferFull);
1✔
128
            }
7✔
129

130
            // Copy within the same buffer: move data from [copy_start..copy_end] to [0..span_len]
131
            // Use copy_within to handle overlapping ranges safely
132
            self.buffer
7✔
133
                .copy_within(copy_start..copy_start.wrapping_add(span_len), 0);
7✔
134
            self.unescaped_len = span_len;
7✔
135
        }
×
136

137
        Ok(())
7✔
138
    }
8✔
139

140
    /// Get the unescaped content slice
141
    pub fn get_unescaped_slice(&self) -> Result<&[u8], DirectBufferError> {
8✔
142
        if self.unescaped_len == 0 {
8✔
143
            return Err(DirectBufferError::InvalidState(
1✔
144
                "No unescaped content available",
1✔
145
            ));
1✔
146
        }
7✔
147
        Ok(&self.buffer[0..self.unescaped_len])
7✔
148
    }
8✔
149

150
    /// Clear unescaped content (call after yielding unescaped string)
151
    pub fn clear_unescaped(&mut self) {
3✔
152
        self.unescaped_len = 0;
3✔
153
    }
3✔
154

155
    /// Get current tokenize position (for string start tracking)
156
    pub fn current_position(&self) -> usize {
106✔
157
        self.tokenize_pos
106✔
158
    }
106✔
159

160
    /// Check if buffer is empty (no more data to process)
161
    pub fn is_empty(&self) -> bool {
625✔
162
        self.tokenize_pos >= self.data_end
625✔
163
    }
625✔
164

165
    /// Check if we have unescaped content ready
166
    pub fn has_unescaped_content(&self) -> bool {
212✔
167
        self.unescaped_len > 0
212✔
168
    }
212✔
169

170
    /// Append a single byte to the unescaped content
171
    pub fn append_unescaped_byte(&mut self, byte: u8) -> Result<(), DirectBufferError> {
83✔
172
        let available_space = self.buffer.len().saturating_sub(self.escape_reserve);
83✔
173
        if self.unescaped_len >= available_space {
83✔
174
            return Err(DirectBufferError::BufferFull);
3✔
175
        }
80✔
176

177
        self.buffer[self.unescaped_len] = byte;
80✔
178
        self.unescaped_len = self.unescaped_len.wrapping_add(1);
80✔
179
        Ok(())
80✔
180
    }
83✔
181

182
    /// Get a string slice from the buffer (zero-copy)
183
    /// Used for strings without escapes
184
    pub fn get_string_slice(&self, start: usize, end: usize) -> Result<&[u8], DirectBufferError> {
68✔
185
        if start > end || end > self.data_end {
68✔
186
            return Err(DirectBufferError::InvalidState("Invalid slice bounds"));
×
187
        }
68✔
188
        Ok(&self.buffer[start..end])
68✔
189
    }
68✔
190
}
191

192
#[cfg(test)]
193
mod tests {
194
    use super::*;
195

196
    #[test]
197
    fn test_lifetime_expectations() {
1✔
198
        // This test demonstrates how DirectBuffer lifetimes should work
199
        let mut buffer = [0u8; 100];
1✔
200
        let mut direct_buffer = DirectBuffer::new(&mut buffer);
1✔
201

202
        // Simulate some data being in the buffer
203
        let test_data = b"hello world";
1✔
204
        direct_buffer.buffer[0..test_data.len()].copy_from_slice(test_data);
1✔
205
        direct_buffer.data_end = test_data.len();
1✔
206

207
        // Test that we can get buffer data
208

209
        // Test unescaped content - add some unescaped data
210
        direct_buffer.unescaped_len = 3;
1✔
211
        direct_buffer.buffer[0..3].copy_from_slice(b"abc");
1✔
212

213
        let unescaped_slice = direct_buffer.get_unescaped_slice().unwrap();
1✔
214
        assert_eq!(unescaped_slice, b"abc");
1✔
215

216
        // The key expectation: these slices should live as long as the original buffer
217
        // and be usable to create String::Borrowed(&'buffer str) and String::Unescaped(&'buffer str)
218
    }
1✔
219

220
    #[test]
221
    fn test_new_direct_buffer() {
1✔
222
        let mut buffer = [0u8; 100];
1✔
223
        let db = DirectBuffer::new(&mut buffer);
1✔
224

225
        assert_eq!(db.tokenize_pos, 0);
1✔
226
        assert_eq!(db.data_end, 0);
1✔
227
        assert_eq!(db.unescaped_len, 0);
1✔
228
        assert_eq!(db.escape_reserve, 64); // 10% of 100, minimum 64
1✔
229
        assert!(db.is_empty());
1✔
230
    }
1✔
231

232
    #[test]
233
    fn test_fill_and_advance() {
1✔
234
        let mut buffer = [0u8; 100];
1✔
235
        let mut db = DirectBuffer::new(&mut buffer);
1✔
236

237
        // Fill with some data
238
        {
1✔
239
            let fill_slice = db.get_fill_slice().unwrap();
1✔
240
            fill_slice[0..5].copy_from_slice(b"hello");
1✔
241
        }
1✔
242
        db.mark_filled(5).unwrap();
1✔
243

244
        assert_eq!(db.data_end, 5);
1✔
245
        assert_eq!(db.remaining_bytes(), 5);
1✔
246

247
        // Read bytes
248
        assert_eq!(db.current_byte().unwrap(), b'h');
1✔
249
        db.advance().unwrap();
1✔
250
        assert_eq!(db.current_byte().unwrap(), b'e');
1✔
251
        assert_eq!(db.remaining_bytes(), 4);
1✔
252
    }
1✔
253

254
    #[test]
255
    fn test_error_conditions() {
1✔
256
        let mut buffer = [0u8; 10];
1✔
257
        let mut db = DirectBuffer::new(&mut buffer);
1✔
258

259
        // EndOfData errors
260
        assert_eq!(db.current_byte().unwrap_err(), DirectBufferError::EndOfData);
1✔
261
        assert_eq!(db.advance().unwrap_err(), DirectBufferError::EndOfData);
1✔
262

263
        // No unescaped content
264
        assert!(db.get_unescaped_slice().is_err());
1✔
265
    }
1✔
266

267
    #[test]
268
    fn test_buffer_full_scenario() {
1✔
269
        // Test what happens when buffer gets completely full
270
        let mut buffer = [0u8; 10];
1✔
271
        let mut db = DirectBuffer::new(&mut buffer);
1✔
272

273
        // Fill buffer completely
274
        {
1✔
275
            let fill_slice = db.get_fill_slice().unwrap();
1✔
276
            fill_slice.copy_from_slice(b"0123456789");
1✔
277
        }
1✔
278
        db.mark_filled(10).unwrap();
1✔
279

280
        // No more space for filling
281
        assert!(db.get_fill_slice().is_none());
1✔
282

283
        // We can still read from buffer
284
        assert_eq!(db.current_byte().unwrap(), b'0');
1✔
285
        assert_eq!(db.remaining_bytes(), 10);
1✔
286
    }
1✔
287

288
    #[test]
289
    fn test_minimal_buffer_with_long_token() {
1✔
290
        // Test very small buffer with a token that doesn't fit
291
        let mut buffer = [0u8; 8]; // Very small buffer
1✔
292
        let mut db = DirectBuffer::new(&mut buffer);
1✔
293

294
        // Try to put a string that's almost as big as the buffer
295
        {
1✔
296
            let fill_slice = db.get_fill_slice().unwrap();
1✔
297
            fill_slice[0..6].copy_from_slice(b"\"hello"); // Start of a long string, no closing quote
1✔
298
        }
1✔
299
        db.mark_filled(6).unwrap();
1✔
300

301
        // Advance through the data
302
        for _ in 0..6 {
7✔
303
            db.advance().unwrap();
6✔
304
        }
6✔
305

306
        // Now buffer is exhausted but we don't have a complete token
307
        assert!(db.is_empty());
1✔
308
        assert_eq!(db.remaining_bytes(), 0);
1✔
309

310
        // This simulates the scenario where we need more data but can't fit it
311
        // The parser would need to handle this by buffering the incomplete token
312
    }
1✔
313

314
    #[test]
315
    fn test_reader_returns_zero_bytes() {
1✔
316
        let mut buffer = [0u8; 20];
1✔
317
        let mut db = DirectBuffer::new(&mut buffer);
1✔
318

319
        // Simulate Reader returning 0 bytes (EOF)
320
        {
321
            let fill_slice = db.get_fill_slice().unwrap();
1✔
322
            assert_eq!(fill_slice.len(), 20);
1✔
323
            // Reader returns 0 bytes - simulating EOF or no data available
324
        }
325
        db.mark_filled(0).unwrap(); // Reader returned 0
1✔
326

327
        assert!(db.is_empty());
1✔
328
        assert_eq!(db.data_end, 0);
1✔
329
        assert_eq!(db.remaining_bytes(), 0);
1✔
330

331
        // Should still be able to get fill slice for next attempt
332
        let fill_slice = db.get_fill_slice().unwrap();
1✔
333
        assert_eq!(fill_slice.len(), 20);
1✔
334
    }
1✔
335

336
    #[test]
337
    fn test_maximum_escape_reserve_scenario() {
1✔
338
        let mut buffer = [0u8; 100];
1✔
339
        let db = DirectBuffer::new(&mut buffer);
1✔
340

341
        // Check escape reserve calculation
342
        assert_eq!(db.escape_reserve, 64); // max(100/10, 64) = 64
1✔
343

344
        // Test with smaller buffer
345
        let mut small_buffer = [0u8; 50];
1✔
346
        let small_db = DirectBuffer::new(&mut small_buffer);
1✔
347
        assert_eq!(small_db.escape_reserve, 64); // Still 64 (minimum)
1✔
348

349
        // Test with larger buffer
350
        let mut large_buffer = [0u8; 1000];
1✔
351
        let large_db = DirectBuffer::new(&mut large_buffer);
1✔
352
        assert_eq!(large_db.escape_reserve, 125); // 1000 >> 3 = 125
1✔
353
    }
1✔
354

355
    #[test]
356
    fn test_boundary_conditions() {
1✔
357
        let mut buffer = [0u8; 3]; // Absolute minimum
1✔
358
        let mut db = DirectBuffer::new(&mut buffer);
1✔
359

360
        // Can't even hold a proper JSON token, but should not crash
361
        {
1✔
362
            let fill_slice = db.get_fill_slice().unwrap();
1✔
363
            fill_slice.copy_from_slice(b"\"a\"");
1✔
364
        }
1✔
365
        db.mark_filled(3).unwrap();
1✔
366

367
        // Should be able to read through it
368
        assert_eq!(db.current_byte().unwrap(), b'"');
1✔
369
        db.advance().unwrap();
1✔
370
        assert_eq!(db.current_byte().unwrap(), b'a');
1✔
371
        db.advance().unwrap();
1✔
372
        assert_eq!(db.current_byte().unwrap(), b'"');
1✔
373
        db.advance().unwrap();
1✔
374

375
        assert!(db.is_empty());
1✔
376
    }
1✔
377

378
    #[test]
379
    fn test_start_unescaping_with_copy_span_too_large() {
1✔
380
        let mut buffer = [0u8; 10]; // Small buffer
1✔
381
        let mut db = DirectBuffer::new(&mut buffer);
1✔
382

383
        // Fill buffer with some data
384
        {
1✔
385
            let fill_slice = db.get_fill_slice().unwrap();
1✔
386
            fill_slice.copy_from_slice(b"0123456789");
1✔
387
        }
1✔
388
        db.mark_filled(10).unwrap();
1✔
389

390
        // Try to copy a span that's larger than the entire buffer
391
        let copy_start = 0;
1✔
392
        let copy_end = 15; // This span (15 bytes) is larger than buffer (10 bytes)
1✔
393
        let max_escaped_len = 5; // This is fine
1✔
394

395
        // Should return BufferFull error instead of silently truncating
396
        let result = db.start_unescaping_with_copy(max_escaped_len, copy_start, copy_end);
1✔
397
        assert_eq!(result.unwrap_err(), DirectBufferError::BufferFull);
1✔
398

399
        // Test boundary case: span exactly equals buffer size should work
400
        let copy_end_exact = 10; // Span of exactly 10 bytes (buffer size)
1✔
401
        let result = db.start_unescaping_with_copy(max_escaped_len, 0, copy_end_exact);
1✔
402
        assert!(result.is_ok());
1✔
403
        assert_eq!(db.unescaped_len, 10);
1✔
404

405
        // Test valid smaller span should work
406
        db.clear_unescaped();
1✔
407
        let result = db.start_unescaping_with_copy(max_escaped_len, 2, 6); // 4 byte span
1✔
408
        assert!(result.is_ok());
1✔
409
        assert_eq!(db.unescaped_len, 4);
1✔
410
        assert_eq!(db.get_unescaped_slice().unwrap(), b"2345");
1✔
411
    }
1✔
412

413
    #[test]
414
    fn test_append_unescaped_byte_respects_escape_reserve() {
1✔
415
        let mut buffer = [0u8; 100]; // 100 byte buffer
1✔
416
        let mut db = DirectBuffer::new(&mut buffer);
1✔
417

418
        // Check escape reserve was set correctly (10% of 100, minimum 64)
419
        assert_eq!(db.escape_reserve, 64);
1✔
420

421
        // Should be able to append up to (buffer_len - escape_reserve) bytes
422
        let max_unescaped = 100 - db.escape_reserve; // 100 - 64 = 36
1✔
423

424
        // Fill up to the limit - should succeed
425
        for i in 0..max_unescaped {
36✔
426
            let result = db.append_unescaped_byte(b'A');
36✔
427
            assert!(result.is_ok(), "Failed at byte {}", i);
36✔
428
        }
429

430
        assert_eq!(db.unescaped_len, max_unescaped);
1✔
431

432
        // One more byte should fail due to escape reserve constraint
433
        let result = db.append_unescaped_byte(b'B');
1✔
434
        assert_eq!(result.unwrap_err(), DirectBufferError::BufferFull);
1✔
435

436
        // Verify we didn't exceed the escape reserve boundary
437
        assert_eq!(db.unescaped_len, max_unescaped);
1✔
438
    }
1✔
439

440
    #[test]
441
    fn test_append_unescaped_byte_escape_reserve_larger_than_buffer() {
1✔
442
        let mut buffer = [0u8; 10]; // Very small buffer
1✔
443
        let mut db = DirectBuffer::new(&mut buffer);
1✔
444

445
        // Even small buffers get minimum 64 byte escape reserve, but that's larger than buffer
446
        assert_eq!(db.escape_reserve, 64); // minimum
1✔
447

448
        // Since escape_reserve (64) > buffer.len() (10), no bytes should be appendable
449
        // This should not panic with underflow, but return BufferFull error
450
        let result = db.append_unescaped_byte(b'A');
1✔
451
        assert_eq!(result.unwrap_err(), DirectBufferError::BufferFull);
1✔
452

453
        // Test with even smaller buffer to ensure we handle underflow correctly
454
        let mut tiny_buffer = [0u8; 3];
1✔
455
        let mut tiny_db = DirectBuffer::new(&mut tiny_buffer);
1✔
456
        assert_eq!(tiny_db.escape_reserve, 64); // Still minimum 64
1✔
457

458
        // Should handle this gracefully without panic
459
        let result = tiny_db.append_unescaped_byte(b'B');
1✔
460
        assert_eq!(result.unwrap_err(), DirectBufferError::BufferFull);
1✔
461
    }
1✔
462
}
463

464
impl crate::number_parser::NumberExtractor for DirectBuffer<'_> {
465
    fn get_number_slice(
38✔
466
        &self,
38✔
467
        start: usize,
38✔
468
        end: usize,
38✔
469
    ) -> Result<&[u8], crate::shared::ParseError> {
38✔
470
        self.get_string_slice(start, end)
38✔
471
            .map_err(|_| crate::shared::ParseError::UnexpectedState("Invalid number slice bounds"))
38✔
472
    }
38✔
473

474
    fn current_position(&self) -> usize {
38✔
475
        self.tokenize_pos
38✔
476
    }
38✔
477

478
    fn is_empty(&self) -> bool {
21✔
479
        self.tokenize_pos >= self.data_end
21✔
480
    }
21✔
481
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc