• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

kaidokert / picojson-rs / 16708793874

03 Aug 2025 07:48PM UTC coverage: 93.313% (-0.7%) from 94.008%
16708793874

Pull #77

github

web-flow
Merge 6f7b76718 into 377ce19f7
Pull Request #77: Datasource intro

483 of 578 new or added lines in 9 files covered. (83.56%)

63 existing lines in 6 files now uncovered.

4996 of 5354 relevant lines covered (93.31%)

1311.46 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

78.75
/picojson/src/push_parser.rs
1
// SPDX-License-Identifier: Apache-2.0
2

3
//! A SAX-style JSON push parser.
4
//!
5
//! Clean implementation based on handler_design pattern with proper HRTB lifetime management.
6

7
use crate::event_processor::{ContentExtractor, EscapeTiming, ParserCore};
8
use crate::push_content_builder::{PushContentExtractor, PushParserHandler};
9
use crate::shared::{DataSource, State};
10
use crate::stream_buffer::StreamBufferError;
11
use crate::{ujson, BitStackConfig, Event, ParseError};
12

13
#[cfg(any(test, debug_assertions))]
14
extern crate std;
15

16
/// A SAX-style JSON push parser.
17
///
18
/// Generic over BitStack storage type for configurable nesting depth. Parsing
19
/// events are returned to the handler.
20
///
21
/// # Generic Parameters
22
///
23
/// * `'scratch` - Lifetime for the scratch buffer used for temporary storage
24
/// * `H` - The event handler type that implements [`PushParserHandler`]
25
/// * `C` - BitStack configuration type that implements [`BitStackConfig`]
26
pub struct PushParser<'input, 'scratch, H, C>
27
where
28
    C: BitStackConfig,
29
{
30
    /// Content extractor that handles content extraction and event emission
31
    extractor: PushContentExtractor<'input, 'scratch>,
32
    /// The handler that receives events
33
    handler: H,
34
    /// Core parser logic shared with other parsers
35
    core: ParserCore<C::Bucket, C::Counter>,
36
}
37

38
impl<'input, 'scratch, H, C> PushParser<'input, 'scratch, H, C>
39
where
40
    C: BitStackConfig,
41
{
42
    /// Creates a new `PushParser`.
43
    pub fn new(handler: H, buffer: &'scratch mut [u8]) -> Self {
1,061✔
44
        Self {
1,061✔
45
            extractor: PushContentExtractor::new(buffer),
1,061✔
46
            handler,
1,061✔
47
            core: ParserCore::new_chunked(),
1,061✔
48
        }
1,061✔
49
    }
1,061✔
50

51
    /// Processes a chunk of input data.
52
    pub fn write<E>(&mut self, data: &'input [u8]) -> Result<(), PushParseError<E>>
6,518✔
53
    where
6,518✔
54
        H: for<'a, 'b> PushParserHandler<'a, 'b, E>,
6,518✔
55
        E: From<ParseError>,
6,518✔
56
    {
57
        // Apply any queued buffer resets
58
        self.extractor.apply_unescaped_reset_if_queued();
6,518✔
59

60
        // Set the input slice for the extractor to iterate over
61
        self.extractor.set_chunk(data);
6,518✔
62

63
        // Use ParserCore to process all bytes in the chunk
64
        let mut event_count = 0;
6,518✔
65
        loop {
66
            match self.core.next_event_impl_with_flags(
13,413✔
67
                &mut self.extractor,
13,413✔
68
                EscapeTiming::OnEnd, // PushParser uses OnEnd timing like StreamParser
13,413✔
69
                |extractor, byte| {
18,645✔
70
                    // Selective accumulation: let PushContentExtractor decide based on its state
71
                    // whether this byte should be accumulated or processed directly
72
                    extractor.handle_byte_accumulation(byte)
18,645✔
73
                },
18,645✔
74
                true, // always_accumulate_during_escapes: ensure all hex digits reach the accumulator
75
            ) {
76
                Ok(Event::EndDocument) => {
77
                    // EndDocument during write() means we've consumed all bytes in current chunk
NEW
78
                    break;
×
79
                }
80
                Ok(event) => {
6,895✔
81
                    event_count += 1;
6,895✔
82
                    // Handle all other events normally
83
                    self.handler
6,895✔
84
                        .handle_event(event)
6,895✔
85
                        .map_err(PushParseError::Handler)?;
6,895✔
86

87
                    // Apply any queued buffer resets after the event has been processed
88
                    // This ensures that buffer content from previous tokens doesn't leak into subsequent ones
89
                    self.extractor.apply_unescaped_reset_if_queued();
6,895✔
90
                }
91
                Err(ParseError::EndOfData) => {
92
                    // No more events available from current chunk
93
                    break;
6,418✔
94
                }
95
                Err(e) => {
100✔
96
                    return Err(PushParseError::Parse(e));
100✔
97
                }
98
            }
99
        }
100

101
        // Check for chunk boundary condition - if still processing a token when chunk ends
102
        let extractor_state = self.extractor.parser_state();
6,418✔
103

104
        if matches!(
2,626✔
105
            extractor_state,
6,418✔
106
            State::String(_) | State::Key(_) | State::Number(_)
107
        ) {
108
            // If we haven't already started using the scratch buffer (e.g., due to escapes)
109
            if !self.extractor.has_unescaped_content() {
3,792✔
110
                // Copy the partial content from this chunk to scratch buffer before it's lost
111
                self.extractor.copy_partial_content_to_scratch()?;
960✔
112
            } else {
113
                // Special case: For Numbers, check if the scratch buffer is actually empty
114
                // This handles the byte-by-byte case where the flag is stale from previous Key processing
115
                if matches!(extractor_state, State::Number(_)) {
2,832✔
116
                    let buffer_slice = self.extractor.get_unescaped_slice().unwrap_or(&[]);
715✔
117
                    let buffer_empty = buffer_slice.is_empty();
715✔
118

119
                    if buffer_empty {
715✔
NEW
120
                        self.extractor.copy_partial_content_to_scratch()?;
×
121
                    }
715✔
122
                }
2,117✔
123
            }
124
        }
2,626✔
125

126
        // Reset input slice
127
        self.extractor.reset_input();
6,409✔
128

129
        // Update position offset for next call
130
        self.extractor.add_position_offset(data.len());
6,409✔
131

132
        Ok(())
6,409✔
133
    }
6,518✔
134

135
    /// Finishes parsing, flushes any remaining events, and returns the handler.
136
    /// This method consumes the parser.
137
    pub fn finish<E>(mut self) -> Result<H, PushParseError<E>>
952✔
138
    where
952✔
139
        H: for<'a, 'b> PushParserHandler<'a, 'b, E>,
952✔
140
    {
141
        // Check that the JSON document is complete (all containers closed)
142
        // Use a no-op callback since we don't expect any more events
143
        let mut no_op_callback = |_event: ujson::Event, _pos: usize| {};
952✔
144
        let _bytes_processed = self.core.tokenizer.finish(&mut no_op_callback)?;
952✔
145

146
        // Handle any remaining content in the buffer
147
        if *self.extractor.parser_state() != State::None {
946✔
NEW
148
            return Err(crate::push_parser::PushParseError::Parse(
×
NEW
149
                ParseError::EndOfData,
×
NEW
150
            ));
×
151
        }
946✔
152

153
        // Emit EndDocument event
154
        self.handler
946✔
155
            .handle_event(Event::EndDocument)
946✔
156
            .map_err(PushParseError::Handler)?;
946✔
157

158
        Ok(self.handler)
946✔
159
    }
952✔
160
}
161

162
/// An error that can occur during push-based parsing.
163
#[derive(Debug, PartialEq)]
164
pub enum PushParseError<E> {
165
    /// An error occurred within the parser itself.
166
    Parse(ParseError),
167
    /// An error was returned by the user's handler.
168
    Handler(E),
169
}
170

171
impl<E> From<ujson::Error> for PushParseError<E> {
172
    fn from(e: ujson::Error) -> Self {
6✔
173
        PushParseError::Parse(e.into())
6✔
174
    }
6✔
175
}
176

177
impl<E> From<ParseError> for PushParseError<E> {
178
    fn from(e: ParseError) -> Self {
9✔
179
        PushParseError::Parse(e)
9✔
180
    }
9✔
181
}
182

183
impl<E> From<StreamBufferError> for PushParseError<E> {
NEW
184
    fn from(e: StreamBufferError) -> Self {
×
NEW
185
        PushParseError::Parse(e.into())
×
NEW
186
    }
×
187
}
188

189
impl<E> From<core::str::Utf8Error> for PushParseError<E> {
NEW
190
    fn from(e: core::str::Utf8Error) -> Self {
×
NEW
191
        PushParseError::Parse(ParseError::InvalidUtf8(e))
×
NEW
192
    }
×
193
}
194

195
// Implement From<ParseError> for common error types used in tests
196
// This needs to be globally accessible for integration tests, not just unit tests
197
#[cfg(any(test, debug_assertions))]
198
impl From<ParseError> for std::string::String {
NEW
199
    fn from(_: ParseError) -> Self {
×
NEW
200
        std::string::String::new()
×
NEW
201
    }
×
202
}
203

204
#[cfg(any(test, debug_assertions))]
205
impl From<ParseError> for () {
UNCOV
206
    fn from(_: ParseError) -> Self {
×
UNCOV
207
        ()
×
UNCOV
208
    }
×
209
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc