• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

agronholm / cbor2 / 27462848205

13 Jun 2026 09:25AM UTC coverage: 94.906% (+0.02%) from 94.889%
27462848205

Pull #316

github

web-flow
Merge 93c857485 into d4b166c6c
Pull Request #316: avoid quadratic concatenation when decoding chunked strings

53 of 54 new or added lines in 1 file covered. (98.15%)

54 existing lines in 1 file now uncovered.

2366 of 2493 relevant lines covered (94.91%)

206151.68 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.13
/rust/decoder.rs
1
use crate::_cbor2::{BREAK_MARKER, SYS_MAXSIZE, UNDEFINED};
2
use crate::decoder::DecoderResult::{
3
    BeginFrame, CompleteFrame, ContinueFrame, Shareable, SharedReference, StringNamespace,
4
    StringReference, StringValue, Value,
5
};
6
#[cfg(not(Py_3_15))]
7
use crate::types::FrozenDict;
8
use crate::types::{
9
    CBORDecodeEOF, CBORDecodeError, CBORSimpleValue, CBORTag, DECIMAL_TYPE, FRACTION_TYPE,
10
    IPV4ADDRESS_TYPE, IPV4INTERFACE_TYPE, IPV4NETWORK_TYPE, IPV6ADDRESS_TYPE, IPV6INTERFACE_TYPE,
11
    IPV6NETWORK_TYPE, UUID_TYPE,
12
};
13
use crate::utils::{PyImportable, create_exc_from, raise_exc_from};
14
use half::f16;
15
use pyo3::exceptions::{PyException, PyLookupError, PyTypeError, PyValueError};
16
use pyo3::prelude::*;
17
use pyo3::sync::PyOnceLock;
18
use pyo3::types::{
19
    PyBytes, PyCFunction, PyComplex, PyDict, PyFrozenSet, PyInt, PyList, PyListMethods, PyMapping,
20
    PySet, PyString, PyTuple,
21
};
22
use pyo3::{IntoPyObjectExt, Py, PyAny, PyErrArguments, intern, pyclass};
23
use std::fmt::{Display, Formatter};
24
use std::mem::{replace, take};
25

26
const IMMUTABLE_ATTR: &str = "_cbor2_immutable";
27
const NAME_ATTR: &str = "_cbor2_name";
28
const SEEK_CUR: u8 = 1;
29

30
static DATE_FROMISOFORMAT: PyImportable = PyImportable::new("datetime", "date.fromisoformat");
31
static DATE_FROMORDINAL: PyImportable = PyImportable::new("datetime", "date.fromordinal");
32
static DATETIME_FROMISOFORMAT: PyImportable =
33
    PyImportable::new("datetime", "datetime.fromisoformat");
34
static DATETIME_FROMTIMESTAMP: PyImportable =
35
    PyImportable::new("datetime", "datetime.fromtimestamp");
36
static EMAIL_PARSER: PyImportable = PyImportable::new("email.parser", "Parser");
37
static INCREMENTAL_UTF8_DECODER: PyOnceLock<Py<PyAny>> = PyOnceLock::new();
38
static INT_FROMBYTES: PyImportable = PyImportable::new("builtins", "int.from_bytes");
39
static IPADDRESS_FUNC: PyImportable = PyImportable::new("ipaddress", "ip_address");
40
static IPNETWORK_FUNC: PyImportable = PyImportable::new("ipaddress", "ip_network");
41
static IPINTERFACE_FUNC: PyImportable = PyImportable::new("ipaddress", "ip_interface");
42
static RE_COMPILE: PyImportable = PyImportable::new("re", "compile");
43
static UTC: PyImportable = PyImportable::new("datetime", "timezone.utc");
44
#[cfg(Py_3_15)]
45
static FROZEN_DICT: PyImportable = PyImportable::new("builtins", "frozendict");
46

47
enum DecoderResult<'a> {
48
    BeginFrame(
49
        Box<DecoderCallback<'a>>,
50
        bool,
51
        Option<Bound<'a, PyAny>>,
52
        DisplayName<'a>,
53
    ),
54
    ContinueFrame(bool),
55
    CompleteFrame(Bound<'a, PyAny>),
56
    Value(Bound<'a, PyAny>),
57
    StringValue(Bound<'a, PyAny>, usize),
58
    StringNamespace,
59
    StringReference(usize),
60
    Shareable,
61
    SharedReference(usize),
62
}
63

64
enum DisplayName<'a> {
65
    String(&'static str),
66
    SemanticTag(u64),
67
    PythonName(Bound<'a, PyAny>),
68
}
69

70
impl<'a> Display for DisplayName<'a> {
71
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
540✔
72
        match self {
540✔
73
            DisplayName::String(s) => f.write_str(s),
516✔
74
            DisplayName::SemanticTag(tagnum) => write!(f, "semantic tag {}", tagnum),
12✔
75
            DisplayName::PythonName(obj) => write!(f, "{}", obj),
12✔
76
        }
77
    }
540✔
78
}
79

80
type DecoderCallback<'py> =
81
    dyn 'py + FnMut(Bound<'py, PyAny>, bool) -> PyResult<DecoderResult<'py>>;
82

83
struct StackFrame<'py> {
84
    immutable: bool,
85
    decoder_callback: Option<Box<DecoderCallback<'py>>>,
86
    shareable_index: Option<usize>,
87
    typename: DisplayName<'py>,
88
    contains_string_namespace: bool,
89
}
90

91
/// Decorates a function to be a two-stage decoder.
92
///
93
/// :param name: the name displayed in a :exc:`CBORDecodeError` raised by the decoder
94
///     (e.g. "error decoding thingamajig") where name='thingamajig`)
95
/// :param immutable: :data:`True` if the item sent to the decoder should be decoded as immutable
96
#[pyfunction]
97
#[pyo3(signature = (func=None, /, *, name=None, immutable=false))]
98
pub fn shareable_decoder<'py>(
120✔
99
    py: Python<'py>,
120✔
100
    func: Option<Py<PyAny>>,
120✔
101
    name: Option<Py<PyString>>,
120✔
102
    immutable: bool,
120✔
103
) -> PyResult<Bound<'py, PyAny>> {
120✔
104
    match func {
120✔
105
        None => PyCFunction::new_closure(
60✔
106
            py,
60✔
107
            None,
60✔
108
            None,
60✔
109
            move |args: &Bound<'_, PyTuple>,
110
                  _kwargs: Option<&Bound<'_, PyDict>>|
111
                  -> PyResult<Py<PyAny>> {
60✔
112
                let py = args.py();
60✔
113
                let func = args.get_item(0)?;
60✔
114
                let name = name.as_ref().map(|x| x.clone_ref(py));
60✔
115
                shareable_decoder(py, Some(func.unbind()), name, immutable).map(Bound::unbind)
60✔
116
            },
60✔
117
        )
118
        .map(|f| f.into_any()),
60✔
119
        Some(func) => {
60✔
120
            let bound_func = func.bind(py);
60✔
121
            if !bound_func.is_callable() {
60✔
UNCOV
122
                return Err(PyTypeError::new_err(format!("{func} is not callable")));
×
123
            }
60✔
124
            bound_func.setattr(intern!(py, NAME_ATTR), name)?;
60✔
125
            bound_func.setattr(intern!(py, IMMUTABLE_ATTR), immutable)?;
60✔
126
            Ok(bound_func.clone().into_any())
60✔
127
        }
128
    }
129
}
120✔
130

131
fn require_tuple<'py>(value: Bound<'py, PyAny>, length: usize) -> PyResult<Bound<'py, PyTuple>> {
488✔
132
    let array: Bound<'py, PyTuple> = value
488✔
133
        .cast_into()
488✔
134
        .map_err(|_| PyTypeError::new_err("input value must be an array"))?;
488✔
135
    if array.len() != length {
452✔
136
        return Err(PyValueError::new_err(format!(
×
UNCOV
137
            "expected an array with exactly {length} elements"
×
UNCOV
138
        )));
×
139
    }
452✔
140
    Ok(array)
452✔
141
}
488✔
142

143
/// The CBORDecoder class implements a fully featured `CBOR`_ decoder with
144
/// several extensions for handling shared references, big integers, rational
145
/// numbers and so on. Typically, the class is not used directly, but the
146
/// :func:`load` and :func:`loads` functions are called to indirectly construct
147
/// and use the class.
148
///
149
/// When the class is constructed manually, the main entry point is :meth:`decode`.
150
///
151
/// :param fp: the file to read from (any file-like object opened for reading in binary mode)
152
/// :param tag_hook:
153
///     callable that takes 2 arguments: the decoder instance, and the :class:`.CBORTag`
154
///     to be decoded. This callback is invoked for any tags for which there is no
155
///     built-in decoder. The return value is substituted for the :class:`.CBORTag`
156
///     object in the deserialized output
157
/// :param object_hook:
158
///     callable that takes 2 arguments: the decoder instance, and a dictionary. This
159
///     callback is invoked for each deserialized :class:`dict` object. The return value
160
///     is substituted for the dict in the deserialized output.
161
/// :param semantic_decoders:
162
///     An optional mapping for overriding the decoding for select semantic tags.
163
///     The value is a mapping of semantic tags (integers) to callables that take
164
///     the decoder instance as the sole argument.
165
/// :param str_errors:
166
///     determines how to handle Unicode decoding errors (see the `Error Handlers`_
167
///     section in the standard library documentation for details)
168
/// :param read_size: minimum number of bytes to read at once
169
///     (ignored if ``fp`` is not seekable)
170
/// :param max_depth:
171
///     maximum allowed depth for nested containers
172
/// :param allow_indefinite:
173
///     if :data:`False`, raise a :exc:`CBORDecodeError` when encountering an indefinite-length
174
///     string or container in the input stream
175
/// :param allow_duplicate_keys:
176
///     if :data:`False`, raise a :exc:`CBORDecodeError` when a map key that has already been
177
///     decoded in the same map is encountered
178
///
179
/// .. _CBOR: https://cbor.io/
180
#[pyclass(module = "cbor2")]
181
pub struct CBORDecoder {
182
    fp: Option<Py<PyAny>>,
183
    tag_hook: Option<Py<PyAny>>,
184
    object_hook: Option<Py<PyAny>>,
185
    semantic_decoders: Option<Py<PyMapping>>,
186
    str_errors: Option<Py<PyString>>,
187
    #[pyo3(get)]
188
    read_size: usize,
189
    #[pyo3(get)]
190
    max_depth: usize,
191
    #[pyo3(get)]
192
    allow_indefinite: bool,
193
    #[pyo3(get)]
194
    allow_duplicate_keys: bool,
195

196
    read_method: Option<Py<PyAny>>,
197
    buffer: Option<Py<PyBytes>>,
198
    read_position: usize,
199
    available_bytes: usize,
200
    fp_is_seekable: bool,
201
}
202

203
impl CBORDecoder {
204
    pub fn new_internal(
4,680✔
205
        py: Python<'_>,
4,680✔
206
        fp: Option<&Bound<'_, PyAny>>,
4,680✔
207
        buffer: Option<Bound<PyBytes>>,
4,680✔
208
        tag_hook: Option<&Bound<'_, PyAny>>,
4,680✔
209
        object_hook: Option<&Bound<'_, PyAny>>,
4,680✔
210
        semantic_decoders: Option<&Bound<'_, PyMapping>>,
4,680✔
211
        str_errors: &str,
4,680✔
212
        read_size: usize,
4,680✔
213
        max_depth: usize,
4,680✔
214
        allow_indefinite: bool,
4,680✔
215
        allow_duplicate_keys: bool,
4,680✔
216
    ) -> PyResult<Self> {
4,680✔
217
        let available_bytes = if let Some(buffer) = buffer.as_ref() {
4,680✔
218
            buffer.len()?
4,236✔
219
        } else {
220
            0
444✔
221
        };
222
        let bound_str_errors = PyString::new(py, str_errors);
4,680✔
223
        let mut this = Self {
4,680✔
224
            fp: None,
4,680✔
225
            tag_hook: None,
4,680✔
226
            object_hook: None,
4,680✔
227
            str_errors: None,
4,680✔
228
            read_size,
4,680✔
229
            max_depth,
4,680✔
230
            allow_indefinite,
4,680✔
231
            allow_duplicate_keys,
4,680✔
232
            semantic_decoders: semantic_decoders.map(|d| d.clone().unbind()),
4,680✔
233
            read_method: None,
4,680✔
234
            buffer: buffer.map(Bound::unbind),
4,680✔
235
            read_position: 0,
236
            available_bytes,
4,680✔
237
            fp_is_seekable: false,
238
        };
239
        if let Some(fp) = fp {
4,680✔
240
            this.set_fp(fp)?
444✔
241
        };
4,236✔
242
        this.set_tag_hook(tag_hook)?;
4,656✔
243
        this.set_object_hook(object_hook)?;
4,644✔
244
        this.set_str_errors(&bound_str_errors)?;
4,632✔
245
        Ok(this)
4,620✔
246
    }
4,680✔
247

248
    fn read_from_fp<'py>(
468✔
249
        &mut self,
468✔
250
        py: Python<'py>,
468✔
251
        minimum_amount: usize,
468✔
252
    ) -> PyResult<(Bound<'py, PyBytes>, usize)> {
468✔
253
        let read_size: usize = if self.fp_is_seekable {
468✔
254
            self.read_size
336✔
255
        } else {
256
            1
132✔
257
        };
258
        let bytes_to_read = minimum_amount.max(read_size);
468✔
259
        let num_read_bytes = if let Some(read) = self.read_method.as_ref() {
468✔
260
            let bytes_from_fp: Bound<PyBytes> =
360✔
261
                read.bind(py).call1((&bytes_to_read,))?.cast_into()?;
360✔
262
            let num_read_bytes = bytes_from_fp.len()?;
360✔
263
            if num_read_bytes >= minimum_amount {
360✔
264
                return Ok((bytes_from_fp, num_read_bytes));
312✔
265
            }
48✔
266
            num_read_bytes
48✔
267
        } else {
268
            0
108✔
269
        };
270
        Err(CBORDecodeEOF::new_err(format!(
156✔
271
            "premature end of stream (expected to read at least {minimum_amount} \
156✔
272
                 bytes, got {num_read_bytes} instead)"
156✔
273
        )))
156✔
274
    }
468✔
275

276
    fn read_exact<const N: usize>(&mut self, py: Python<'_>) -> PyResult<[u8; N]> {
10,425,000✔
277
        if self.available_bytes == 0 {
10,425,000✔
278
            // No buffer
279
            let (new_bytes, amount_read) = self.read_from_fp(py, N)?;
276✔
280
            self.read_position = N;
252✔
281
            self.available_bytes = amount_read - N;
252✔
282
            self.buffer = Some(new_bytes.unbind());
252✔
283
            Ok(self.buffer.as_ref().unwrap().as_bytes(py)[..N].try_into()?)
252✔
284
        } else if self.available_bytes < N {
10,424,724✔
285
            // Combine the remnants of the partial buffer with new data read from the file
286
            let needed_bytes = N - self.available_bytes;
24✔
287
            let mut concatenated_buffer: Vec<u8> = self.buffer.take().unwrap().extract(py)?;
24✔
288
            if self.read_position > 0 {
24✔
289
                concatenated_buffer.drain(..self.read_position);
24✔
290
            }
24✔
291
            concatenated_buffer.truncate(self.available_bytes);
24✔
292
            let (new_bytes, amount_read) = self.read_from_fp(py, needed_bytes)?;
24✔
293
            concatenated_buffer.extend_from_slice(&new_bytes[..needed_bytes]);
24✔
294
            self.buffer = Some(new_bytes.unbind());
24✔
295
            self.available_bytes = amount_read - needed_bytes;
24✔
296
            self.read_position = needed_bytes;
24✔
297
            Ok(concatenated_buffer
24✔
298
                .try_into()
24✔
299
                .expect("buffer size mismatch"))
24✔
300
        } else {
301
            // Return a slice from the existing bytes object
302
            let slice: [u8; N] = self.buffer.as_ref().unwrap().bind(py).as_bytes()
10,424,700✔
303
                [self.read_position..self.read_position + N]
10,424,700✔
304
                .try_into()?;
10,424,700✔
305
            self.available_bytes -= N;
10,424,700✔
306
            self.read_position += N;
10,424,700✔
307
            Ok(slice)
10,424,700✔
308
        }
309
    }
10,425,000✔
310

311
    fn read_major_and_subtype(&mut self, py: Python<'_>) -> PyResult<(u8, u8)> {
10,414,316✔
312
        let initial_byte = self.read_exact::<1>(py)?[0];
10,414,316✔
313
        let major_type = initial_byte >> 5;
10,414,292✔
314
        let subtype = initial_byte & 31;
10,414,292✔
315
        Ok((major_type, subtype))
10,414,292✔
316
    }
10,414,316✔
317

318
    fn decode_length_finite(&mut self, py: Python<'_>, subtype: u8) -> PyResult<u64> {
9,606,936✔
319
        match self.decode_length(py, subtype)? {
9,606,936✔
320
            Some(length) => Ok(length),
9,606,900✔
321
            None => Err(CBORDecodeError::new_err(
24✔
322
                "indefinite length not allowed here",
24✔
323
            )),
24✔
324
        }
325
    }
9,606,936✔
326

327
    /// Like [`decode_length`], but converts `Some(u64)` to `Some(usize)`, returning
328
    /// a [`CBORDecodeError`] if the value exceeds the platform's address space.
329
    fn decode_length_as_usize(&mut self, py: Python<'_>, subtype: u8) -> PyResult<Option<usize>> {
799,264✔
330
        match self.decode_length(py, subtype)? {
799,264✔
331
            Some(length) => usize::try_from(length).map(Some).map_err(|_| {
798,868✔
UNCOV
332
                CBORDecodeError::new_err(format!(
×
333
                    "huge item length {length} exceeds the system address space"
334
                ))
UNCOV
335
            }),
×
336
            None => Ok(None),
384✔
337
        }
338
    }
799,264✔
339

340
    //
341
    // Decoders for major tags (0-7)
342
    //
343

344
    /// Decode the length of the next item.
345
    ///
346
    /// This is a low-level operation that may be needed by custom decoder callbacks.
347
    ///
348
    /// :param subtype:
349
    /// :return: the length of the item, or :data:`None` to indicate an indefinite-length item
350
    fn decode_length(&mut self, py: Python<'_>, subtype: u8) -> PyResult<Option<u64>> {
10,406,200✔
351
        let length = match subtype {
10,406,200✔
352
            ..24 => Some(subtype as u64),
10,406,200✔
353
            24 => Some(self.read_exact::<1>(py)?[0] as u64),
2,148✔
354
            25 => Some(u16::from_be_bytes(self.read_exact(py)?) as u64),
1,132✔
355
            26 => Some(u32::from_be_bytes(self.read_exact(py)?) as u64),
312✔
356
            27 => Some(u64::from_be_bytes(self.read_exact(py)?)),
296✔
357
            31 => {
358
                if !self.allow_indefinite {
420✔
359
                    return Err(CBORDecodeError::new_err(
12✔
360
                        "encountered indefinite length but it has been disabled",
12✔
361
                    ));
12✔
362
                }
408✔
363
                None
408✔
364
            }
365
            _ => {
366
                return Err(CBORDecodeError::new_err(format!(
12✔
367
                    "unknown unsigned integer subtype 0x{subtype:x}"
12✔
368
                )));
12✔
369
            }
370
        };
371
        Ok(length)
10,406,176✔
372
    }
10,406,200✔
373

374
    fn decode_uint<'py>(&mut self, py: Python<'py>, subtype: u8) -> PyResult<DecoderResult<'py>> {
3,252✔
375
        // Major tag 0
376
        let uint: u64 = self.decode_length_finite(py, subtype)?;
3,252✔
377
        Ok(Value(uint.into_bound_py_any(py)?))
3,240✔
378
    }
3,252✔
379

380
    fn decode_negint<'py>(&mut self, py: Python<'py>, subtype: u8) -> PyResult<DecoderResult<'py>> {
380✔
381
        // Major tag 1
382
        let uint: u64 = self.decode_length_finite(py, subtype)?;
380✔
383
        let signed_int = -(uint as i128) - 1;
380✔
384
        Ok(Value(signed_int.into_bound_py_any(py)?))
380✔
385
    }
380✔
386

387
    fn decode_bytestring<'py>(
1,248✔
388
        &mut self,
1,248✔
389
        py: Python<'py>,
1,248✔
390
        subtype: u8,
1,248✔
391
    ) -> PyResult<DecoderResult<'py>> {
1,248✔
392
        // Major tag 2
393
        match self.decode_length_as_usize(py, subtype)? {
1,248✔
394
            None => {
395
                // Indefinite length
396
                let sys_maxsize = *SYS_MAXSIZE.get(py).unwrap();
84✔
397
                let bytes = PyBytes::new_with_writer(py, 0, |writer| {
84✔
398
                    loop {
399
                        let (major_type, subtype) = self.read_major_and_subtype(py)?;
4,800,132✔
400
                        match (major_type, subtype) {
4,800,132✔
401
                            (2, _) => {
402
                                let length = self.decode_length_finite(py, subtype)?;
4,800,084✔
403
                                if length > sys_maxsize {
4,800,072✔
404
                                    return Err(CBORDecodeError::new_err(format!(
12✔
405
                                        "chunk too long in an indefinite bytestring chunk: {length}"
12✔
406
                                    )));
12✔
407
                                }
4,800,060✔
408
                                let length = length as usize;
4,800,060✔
409
                                let chunk = self.read(py, length)?;
4,800,060✔
410
                                writer.write_all(&chunk)?;
4,800,048✔
411
                            }
412
                            (7, 31) => break Ok(()), // break marker
24✔
413
                            _ => {
414
                                return Err(CBORDecodeError::new_err(format!(
24✔
415
                                    "non-byte string (major type {major_type}) found in indefinite \
24✔
416
                                    length byte string"
24✔
417
                                )));
24✔
418
                            }
419
                        }
420
                    }
421
                })?;
84✔
422
                Ok(Value(bytes.into_any()))
24✔
423
            }
424
            Some(length) if length <= 65536 => {
1,164✔
425
                let bytes = self.read(py, length)?;
1,140✔
426
                Ok(StringValue(PyBytes::new(py, &bytes).into_any(), length))
1,104✔
427
            }
428
            Some(length) => {
24✔
429
                // Incrementally read the bytestring, in chunks of 65536 bytes. The claimed
430
                // length is untrusted until the data has actually been read, so no more than
431
                // 64 KiB of it is reserved up front; a truncated payload claiming a huge length
432
                // can then force at most a 64 KiB allocation.
433
                let bytes = PyBytes::new_with_writer(py, length.min(65536), |writer| {
24✔
434
                    let mut remaining_length = length;
24✔
435
                    while remaining_length > 0 {
48✔
436
                        let chunk_size = remaining_length.min(65536);
36✔
437
                        let chunk = self.read(py, chunk_size)?;
36✔
438
                        remaining_length -= chunk_size;
24✔
439
                        writer.write_all(&chunk)?;
24✔
440
                    }
441
                    Ok(())
12✔
442
                })?;
24✔
443
                Ok(StringValue(bytes.into_any(), length))
12✔
444
            }
445
        }
446
    }
1,248✔
447

448
    fn decode_string<'py>(&mut self, py: Python<'py>, subtype: u8) -> PyResult<DecoderResult<'py>> {
788,984✔
449
        // Major tag 3
450
        match self.decode_length_as_usize(py, subtype)? {
788,984✔
451
            None => {
452
                // Indefinite length
453
                let mut parts: Vec<Bound<'py, PyString>> = Vec::new();
108✔
454
                loop {
455
                    let (major_type, subtype) = self.read_major_and_subtype(py)?;
4,800,180✔
456
                    let sys_maxsize = *SYS_MAXSIZE.get(py).unwrap();
4,800,180✔
457
                    match (major_type, subtype) {
4,800,180✔
458
                        (3, _) => {
459
                            let length = self.decode_length_finite(py, subtype)?;
4,800,120✔
460
                            if length > sys_maxsize {
4,800,108✔
461
                                return Err(CBORDecodeError::new_err(format!(
12✔
462
                                    "chunk too long in an indefinite text string chunk: {length}"
12✔
463
                                )));
12✔
464
                            }
4,800,096✔
465
                            let length = length as usize;
4,800,096✔
466
                            let bytes = self.read(py, length)?;
4,800,096✔
467
                            let decoded = match self.str_errors.as_ref() {
4,800,084✔
468
                                None => PyString::from_bytes(py, bytes.as_slice()),
4,800,084✔
469
                                Some(str_errors) => bytes
×
470
                                    .into_bound_py_any(py)?
×
471
                                    .call_method1(
×
UNCOV
472
                                        intern!(py, "decode"),
×
473
                                        (intern!(py, "utf-8"), str_errors),
×
474
                                    )
NEW
475
                                    .and_then(|string| string.cast_into().map_err(PyErr::from)),
×
476
                            }?;
12✔
477
                            parts.push(decoded);
4,800,072✔
478
                        }
479
                        (7, 31) => {
480
                            // break marker
481
                            break PyString::new(py, "")
36✔
482
                                .call_method1(intern!(py, "join"), (PyList::new(py, parts)?,))
36✔
483
                                .map(|joined| Value(joined.into_any()));
36✔
484
                        }
485
                        _ => {
486
                            return Err(CBORDecodeError::new_err(format!(
24✔
487
                                "non-text string (major type {major_type}) found in indefinite \
24✔
488
                                    length text string"
24✔
489
                            )));
24✔
490
                        }
491
                    }
492
                }
493
            }
494
            Some(length) if length <= 65536 => {
788,864✔
495
                let bytes = self.read(py, length)?;
788,804✔
496
                let decoded_string: Bound<'_, PyAny> = match self.str_errors.as_ref() {
788,756✔
497
                    None => PyString::from_bytes(py, bytes.as_slice())?.into_any(),
788,732✔
498
                    Some(str_errors) => bytes.into_bound_py_any(py)?.call_method1(
24✔
499
                        intern!(py, "decode"),
24✔
500
                        (intern!(py, "utf-8"), str_errors.bind(py)),
24✔
UNCOV
501
                    )?,
×
502
                };
503
                Ok(StringValue(decoded_string, length))
788,720✔
504
            }
505
            Some(length) => {
60✔
506
                // Incrementally decode the string, in chunks of 65536 bytes
507
                let decoder_class = INCREMENTAL_UTF8_DECODER
60✔
508
                    .get_or_try_init(py, || -> PyResult<Py<PyAny>> {
60✔
509
                        let decoder = py
12✔
510
                            .import("codecs")?
12✔
511
                            .getattr("lookup")?
12✔
512
                            .call1(("utf-8",))?
12✔
513
                            .getattr("incrementaldecoder")?;
12✔
514
                        Ok(decoder.unbind())
12✔
515
                    })?
12✔
516
                    .bind(py);
60✔
517
                let decoder = match self.str_errors.as_ref() {
60✔
518
                    None => decoder_class.call0()?,
36✔
519
                    Some(str_errors) => decoder_class.call1((str_errors,))?,
24✔
520
                };
521
                let mut parts: Vec<Bound<'py, PyAny>> = Vec::new();
60✔
522
                let mut remaining_length = length;
60✔
523
                while remaining_length > 0 {
204✔
524
                    let chunk_size = remaining_length.min(65536);
144✔
525
                    let chunk = self.read(py, chunk_size)?;
144✔
526
                    remaining_length -= chunk_size;
144✔
527
                    let is_final_chunk = remaining_length == 0;
144✔
528
                    let decoded_chunk =
144✔
529
                        decoder.call_method1(intern!(py, "decode"), (chunk, is_final_chunk))?;
144✔
530
                    parts.push(decoded_chunk);
144✔
531
                }
532
                let joined = PyString::new(py, "")
60✔
533
                    .call_method1(intern!(py, "join"), (PyList::new(py, parts)?,))?;
60✔
534
                Ok(StringValue(joined.into_any(), length))
60✔
535
            }
536
        }
537
    }
788,984✔
538

539
    fn decode_array<'py>(
7,148✔
540
        &mut self,
7,148✔
541
        py: Python<'py>,
7,148✔
542
        subtype: u8,
7,148✔
543
        immutable: bool,
7,148✔
544
    ) -> PyResult<DecoderResult<'py>> {
7,148✔
545
        // Major tag 4
546
        let optional_length = self.decode_length_as_usize(py, subtype)?;
7,148✔
547
        if immutable {
7,148✔
548
            let mut items: Vec<Bound<'py, PyAny>> = Vec::new();
1,016✔
549
            let callback: Box<DecoderCallback<'py>> = if let Some(length) = optional_length {
1,016✔
550
                if length == 0 {
992✔
551
                    return Ok(Value(PyTuple::empty(py).into_any()));
56✔
552
                }
936✔
553

554
                Box::new(move |item: Bound<'py, PyAny>, _immutable: bool| {
1,964✔
555
                    items.push(item);
1,964✔
556
                    if items.len() == length {
1,964✔
557
                        Ok(CompleteFrame(
558
                            PyTuple::new(py, take(&mut items))?.into_any(),
888✔
559
                        ))
560
                    } else {
561
                        Ok(ContinueFrame(false))
1,076✔
562
                    }
563
                })
1,964✔
564
            } else {
565
                let break_marker = BREAK_MARKER.get(py).unwrap().bind(py);
24✔
566
                Box::new(move |item: Bound<'py, PyAny>, _immutable: bool| {
72✔
567
                    if item.is(break_marker) {
72✔
568
                        Ok(CompleteFrame(
569
                            PyTuple::new(py, take(&mut items))?.into_any(),
12✔
570
                        ))
571
                    } else {
572
                        items.push(item);
60✔
573
                        Ok(ContinueFrame(false))
60✔
574
                    }
575
                })
72✔
576
            };
577
            Ok(BeginFrame(
960✔
578
                callback,
960✔
579
                false,
960✔
580
                None,
960✔
581
                DisplayName::String("array"),
960✔
582
            ))
960✔
583
        } else {
584
            let mut list = PyList::empty(py);
6,132✔
585
            let container = list.clone().into_any();
6,132✔
586
            let callback: Box<DecoderCallback<'py>> = if let Some(length) = optional_length {
6,132✔
587
                if length == 0 {
6,024✔
588
                    return Ok(Value(PyList::empty(py).into_any()));
132✔
589
                }
5,892✔
590

591
                Box::new(move |item, _immutable: bool| {
7,972✔
592
                    list.append(item)?;
7,972✔
593
                    if list.len() == length {
7,972✔
594
                        Ok(CompleteFrame(
924✔
595
                            replace(&mut list, PyList::empty(py)).into_any(),
924✔
596
                        ))
924✔
597
                    } else {
598
                        Ok(ContinueFrame(false))
7,048✔
599
                    }
600
                })
7,972✔
601
            } else {
602
                let break_marker = BREAK_MARKER.get(py).unwrap().bind(py);
108✔
603
                Box::new(move |item: Bound<'py, PyAny>, _immutable: bool| {
787,044✔
604
                    if item.is(break_marker) {
787,044✔
605
                        Ok(CompleteFrame(
108✔
606
                            replace(&mut list, PyList::empty(py)).into_any(),
108✔
607
                        ))
108✔
608
                    } else {
609
                        list.append(item)?;
786,936✔
610
                        Ok(ContinueFrame(false))
786,936✔
611
                    }
612
                })
787,044✔
613
            };
614
            Ok(BeginFrame(
6,000✔
615
                callback,
6,000✔
616
                false,
6,000✔
617
                Some(container),
6,000✔
618
                DisplayName::String("array"),
6,000✔
619
            ))
6,000✔
620
        }
621
    }
7,148✔
622

623
    fn decode_map<'py>(
1,884✔
624
        &mut self,
1,884✔
625
        py: Python<'py>,
1,884✔
626
        subtype: u8,
1,884✔
627
        immutable: bool,
1,884✔
628
    ) -> PyResult<DecoderResult<'py>> {
1,884✔
629
        // Major tag 5
630

631
        #[cfg(Py_3_15)]
632
        fn create_frozen_dict<'py>(
12✔
633
            py: Python<'py>,
12✔
634
            items: Vec<(Bound<'py, PyAny>, Bound<'py, PyAny>)>,
12✔
635
        ) -> PyResult<Bound<'py, PyAny>> {
12✔
636
            FROZEN_DICT
12✔
637
                .get(py)?
12✔
638
                .call1((items,))?
12✔
639
                .cast_into()
12✔
640
                .map_err(|e| PyErr::from(e))
12✔
641
        }
12✔
642
        #[cfg(not(Py_3_15))]
643
        fn create_frozen_dict<'py>(
132✔
644
            py: Python<'py>,
132✔
645
            items: Vec<(Bound<'py, PyAny>, Bound<'py, PyAny>)>,
132✔
646
        ) -> PyResult<Bound<'py, PyAny>> {
132✔
647
            FrozenDict::from_items(py, items).map(|dict| dict.into_any())
132✔
648
        }
132✔
649

650
        #[inline]
651
        fn maybe_call_object_hook<'py>(
1,680✔
652
            py: Python<'py>,
1,680✔
653
            dict: Bound<'py, PyAny>,
1,680✔
654
            object_hook: Option<&Py<PyAny>>,
1,680✔
655
            immutable: bool,
1,680✔
656
        ) -> PyResult<Bound<'py, PyAny>> {
1,680✔
657
            if let Some(object_hook) = object_hook {
1,680✔
658
                object_hook.bind(py).call1((dict, immutable))
24✔
659
            } else {
660
                Ok(dict)
1,656✔
661
            }
662
        }
1,680✔
663

664
        let object_hook = self.object_hook.as_ref().map(|hook| hook.clone_ref(py));
1,884✔
665
        let allow_duplicate_keys = self.allow_duplicate_keys;
1,884✔
666
        let length_or_none = self.decode_length_as_usize(py, subtype)?;
1,884✔
667

668
        // Return immediately if this is an empty dict
669
        if let Some(length) = length_or_none
1,884✔
670
            && length == 0
1,824✔
671
        {
672
            let container: Bound<'py, PyAny> = if immutable {
420✔
UNCOV
673
                create_frozen_dict(py, Vec::new())?
×
674
            } else {
675
                PyDict::new(py).into_any()
420✔
676
            };
677
            let transformed =
420✔
678
                maybe_call_object_hook(py, container, object_hook.as_ref(), immutable)?;
420✔
679
            return Ok(Value(transformed));
420✔
680
        };
1,464✔
681

682
        let mut key: Option<Bound<'py, PyAny>> = None;
1,464✔
683
        if immutable {
1,464✔
684
            let seen_keys: Option<Bound<'py, PySet>> = if allow_duplicate_keys {
300✔
685
                None
276✔
686
            } else {
687
                Some(PySet::empty(py)?)
24✔
688
            };
689
            let check_duplicate = move |key: &Bound<'py, PyAny>| -> PyResult<()> {
300✔
690
                let seen = seen_keys.as_ref().unwrap();
48✔
691
                if seen.contains(key)? {
48✔
692
                    let repr = key.repr()?;
24✔
693
                    return Err(CBORDecodeError::new_err(format!(
24✔
694
                        "Duplicate map key: {}",
695
                        repr.to_str()?
24✔
696
                    )));
697
                }
24✔
698
                seen.add(key.clone())
24✔
699
            };
48✔
700

701
            let mut items: Vec<(Bound<'py, PyAny>, Bound<'py, PyAny>)> = Vec::new();
300✔
702
            let callback: Box<DecoderCallback<'py>> = if let Some(length) = length_or_none {
300✔
703
                Box::new(move |item: Bound<'py, PyAny>, _immutable: bool| {
396✔
704
                    if let Some(key) = key.take() {
396✔
705
                        if !allow_duplicate_keys {
192✔
706
                            check_duplicate(&key)?;
24✔
707
                        }
168✔
708
                        items.push((key, item));
180✔
709
                        if items.len() == length {
180✔
710
                            let transformed = maybe_call_object_hook(
144✔
711
                                py,
144✔
712
                                create_frozen_dict(py, take(&mut items))?,
144✔
713
                                object_hook.as_ref(),
144✔
714
                                immutable,
144✔
UNCOV
715
                            )?;
×
716
                            return Ok(CompleteFrame(transformed));
144✔
717
                        }
36✔
718
                        Ok(ContinueFrame(true))
36✔
719
                    } else {
720
                        key = Some(item);
204✔
721
                        Ok(ContinueFrame(false))
204✔
722
                    }
723
                })
396✔
724
            } else {
725
                let break_marker = BREAK_MARKER.get(py).unwrap().bind(py);
12✔
726
                Box::new(move |item: Bound<'py, PyAny>, _immutable: bool| {
48✔
727
                    if item.is(break_marker) {
48✔
UNCOV
728
                        let container = create_frozen_dict(py, take(&mut items))?;
×
UNCOV
729
                        let transformed = maybe_call_object_hook(
×
UNCOV
730
                            py,
×
UNCOV
731
                            container.into_any(),
×
732
                            object_hook.as_ref(),
×
733
                            immutable,
×
734
                        )?;
×
735
                        Ok(CompleteFrame(transformed))
×
736
                    } else if let Some(key) = key.take() {
48✔
737
                        if !allow_duplicate_keys {
24✔
738
                            check_duplicate(&key)?;
24✔
739
                        }
×
740
                        items.push((key, item));
12✔
741
                        Ok(ContinueFrame(true))
12✔
742
                    } else {
743
                        key = Some(item);
24✔
744
                        Ok(ContinueFrame(false))
24✔
745
                    }
746
                })
48✔
747
            };
748
            Ok(BeginFrame(callback, true, None, DisplayName::String("map")))
300✔
749
        } else {
750
            fn check_duplicate(key: &Bound<PyAny>, dict: &Bound<PyDict>) -> PyResult<()> {
48✔
751
                if dict.contains(key)? {
48✔
752
                    let repr = key.repr()?;
24✔
753
                    return Err(CBORDecodeError::new_err(format!(
24✔
754
                        "Duplicate map key: {}",
755
                        repr.to_str()?
24✔
756
                    )));
757
                }
24✔
758
                Ok(())
24✔
759
            }
48✔
760

761
            let mut dict = PyDict::new(py);
1,164✔
762
            let container = dict.clone().into_any();
1,164✔
763
            let callback: Box<DecoderCallback<'py>> = if let Some(length) = length_or_none {
1,164✔
764
                let mut count = 0usize;
1,116✔
765
                Box::new(move |item: Bound<'py, PyAny>, _immutable: bool| {
3,544✔
766
                    if let Some(key) = key.take() {
3,544✔
767
                        if !allow_duplicate_keys {
1,772✔
768
                            check_duplicate(&key, &dict)?;
24✔
769
                        }
1,748✔
770
                        dict.set_item(&key, item)?;
1,760✔
771
                        count += 1;
1,760✔
772
                        if count == length {
1,760✔
773
                            let dict = replace(&mut dict, PyDict::new(py));
1,080✔
774
                            let transformed = maybe_call_object_hook(
1,080✔
775
                                py,
1,080✔
776
                                dict.into_any(),
1,080✔
777
                                object_hook.as_ref(),
1,080✔
778
                                immutable,
1,080✔
779
                            )?;
12✔
780
                            return Ok(CompleteFrame(transformed));
1,068✔
781
                        }
680✔
782
                        Ok(ContinueFrame(true))
680✔
783
                    } else {
784
                        key = Some(item);
1,772✔
785
                        Ok(ContinueFrame(false))
1,772✔
786
                    }
787
                })
3,544✔
788
            } else {
789
                let break_marker = BREAK_MARKER.get(py).unwrap().bind(py);
48✔
790
                Box::new(move |item: Bound<'py, PyAny>, _immutable: bool| {
204✔
791
                    if item.is(break_marker) {
204✔
792
                        let dict = replace(&mut dict, PyDict::new(py));
36✔
793
                        let transformed = maybe_call_object_hook(
36✔
794
                            py,
36✔
795
                            dict.into_any(),
36✔
796
                            object_hook.as_ref(),
36✔
797
                            immutable,
36✔
UNCOV
798
                        )?;
×
799
                        Ok(CompleteFrame(transformed))
36✔
800
                    } else if let Some(key) = key.take() {
168✔
801
                        if !allow_duplicate_keys {
84✔
802
                            check_duplicate(&key, &dict)?;
24✔
803
                        }
60✔
804
                        dict.set_item(&key, item)?;
72✔
805
                        Ok(ContinueFrame(true))
72✔
806
                    } else {
807
                        key = Some(item);
84✔
808
                        Ok(ContinueFrame(false))
84✔
809
                    }
810
                })
204✔
811
            };
812
            Ok(BeginFrame(
1,164✔
813
                callback,
1,164✔
814
                true,
1,164✔
815
                Some(container),
1,164✔
816
                DisplayName::String("map"),
1,164✔
817
            ))
1,164✔
818
        }
819
    }
1,884✔
820

821
    fn decode_semantic<'py>(
3,100✔
822
        &mut self,
3,100✔
823
        py: Python<'py>,
3,100✔
824
        subtype: u8,
3,100✔
825
        immutable: bool,
3,100✔
826
    ) -> PyResult<DecoderResult<'py>> {
3,100✔
827
        let tagnum = self.decode_length_finite(py, subtype)?;
3,100✔
828
        if let Some(semantic_decoders) = &self.semantic_decoders {
3,100✔
829
            match semantic_decoders.bind(py).get_item(tagnum) {
120✔
830
                Ok(decoder) => {
96✔
831
                    let name = decoder.getattr_opt(intern!(py, NAME_ATTR))?;
96✔
832

833
                    // If these attributes are present, this callable was decorated with
834
                    // @shareable_decoder
835
                    return if let Some(name) = name {
96✔
836
                        let require_immutable: bool = decoder
60✔
837
                            .getattr_opt(intern!(py, IMMUTABLE_ATTR))?
60✔
838
                            .map(|x| x.is_truthy())
60✔
839
                            .transpose()?
60✔
840
                            .unwrap_or(false);
60✔
841
                        let retval = decoder.call1((immutable,))?;
60✔
842
                        let tuple: Bound<'_, PyTuple> = retval.cast_into()?;
60✔
843
                        if tuple.len() != 2 {
60✔
UNCOV
844
                            return Err(CBORDecodeError::new_err(format!(
×
UNCOV
845
                                "{decoder} returned a tuple of {} items, expected 2",
×
UNCOV
846
                                tuple.len()
×
UNCOV
847
                            )));
×
848
                        }
60✔
849
                        let container: Bound<'_, PyAny> = tuple.get_item(0)?.cast_into()?;
60✔
850
                        let callback: Bound<'_, PyAny> = tuple.get_item(1)?.cast_into()?;
60✔
851
                        Ok(BeginFrame(
852
                            Box::new(
60✔
853
                                move |item, _immutable: bool| -> PyResult<DecoderResult<'py>> {
60✔
854
                                    callback.call1((item,)).map(CompleteFrame)
60✔
855
                                },
60✔
856
                            ),
857
                            require_immutable,
60✔
858
                            if container.is_none() {
60✔
859
                                None
48✔
860
                            } else {
861
                                Some(container)
12✔
862
                            },
863
                            if name.is_none() {
60✔
864
                                DisplayName::SemanticTag(tagnum)
12✔
865
                            } else {
866
                                DisplayName::PythonName(name.clone())
48✔
867
                            },
868
                        ))
869
                    } else {
870
                        let callback =
36✔
871
                            move |item, new_immutable: bool| -> PyResult<DecoderResult<'py>> {
36✔
872
                                decoder.call1((item, new_immutable)).map(CompleteFrame)
36✔
873
                            };
36✔
874
                        Ok(BeginFrame(
36✔
875
                            Box::new(callback),
36✔
876
                            immutable,
36✔
877
                            None,
36✔
878
                            DisplayName::SemanticTag(tagnum),
36✔
879
                        ))
36✔
880
                    };
881
                }
882
                Err(e) if e.is_instance_of::<PyLookupError>(py) => {}
24✔
UNCOV
883
                Err(e) => return Err(e),
×
884
            }
885
        };
2,980✔
886

887
        // No semantic decoder lookup map – fall back to the hard coded switchboard
888
        let (callback, typename): (Box<DecoderCallback<'py>>, &str) = match tagnum {
3,004✔
889
            0 => (
604✔
890
                Box::new(Self::decode_datetime_string),
604✔
891
                "string-form datetime",
604✔
892
            ),
604✔
893
            1 => (Box::new(Self::decode_epoch_datetime), "epoch-form datetime"),
60✔
894
            2 => (Box::new(Self::decode_positive_bignum), "positive bignum"),
84✔
895
            3 => (Box::new(Self::decode_negative_bignum), "negative bignum"),
36✔
896
            4 => (Box::new(Self::decode_fraction), "decimal fraction"),
112✔
897
            5 => (Box::new(Self::decode_bigfloat), "bigfloat"),
24✔
898
            25 => (Box::new(Self::decode_stringref), "string reference"),
120✔
899
            28 => return Ok(Shareable),
240✔
900
            29 => (Box::new(Self::decode_sharedref), "shared reference"),
180✔
901
            30 => (Box::new(Self::decode_rational), "rational"),
100✔
902
            35 => (Box::new(Self::decode_regexp), "regular expression"),
36✔
903
            36 => (Box::new(Self::decode_mime), "MIME message"),
36✔
904
            37 => (Box::new(Self::decode_uuid), "UUID"),
200✔
905
            52 => (Box::new(Self::decode_ipv4), "IPv4 address"),
172✔
906
            54 => (Box::new(Self::decode_ipv6), "IPv6 address"),
108✔
907
            100 => (Box::new(Self::decode_epoch_date), "epoch-form date"),
12✔
908
            256 => return Ok(StringNamespace),
60✔
909
            258 => return self.decode_set(py, immutable),
268✔
910
            260 => (Box::new(Self::decode_ipaddress), "IP address"),
84✔
911
            261 => (Box::new(Self::decode_ipnetwork), "IP network"),
84✔
912
            1004 => (Box::new(Self::decode_date_string), "string-form date"),
12✔
913
            43000 => (Box::new(Self::decode_complex), "complex number"),
252✔
914
            55799 => (
24✔
915
                Box::new(Self::decode_self_describe_cbor),
24✔
916
                "self-described CBOR value",
24✔
917
            ),
24✔
918
            _ => {
919
                // For a tag with no designated decoder, check if we have a tag hook, and call
920
                // that with the tag object, using its return value as the decoded value.
921
                let tag = CBORTag::new(tagnum.into_bound_py_any(py)?, py.None().into_bound(py))?;
96✔
922
                let bound_tag = Bound::new(py, tag)?.into_any();
96✔
923
                let container = bound_tag.clone();
96✔
924
                let mut tag_hook = self
96✔
925
                    .tag_hook
96✔
926
                    .as_ref()
96✔
927
                    .map(|hook| hook.clone_ref(py).into_bound(py));
96✔
928
                let callback = Box::new(move |item: Bound<'py, PyAny>, _immutable: bool| {
96✔
929
                    let tag: &Bound<'py, CBORTag> = bound_tag.cast()?;
84✔
930
                    tag.borrow_mut().value = item.unbind();
84✔
931
                    if let Some(tag_hook) = tag_hook.take() {
84✔
932
                        tag_hook.call1((&bound_tag, immutable)).map(CompleteFrame)
60✔
933
                    } else {
934
                        Ok(CompleteFrame(bound_tag.clone()))
24✔
935
                    }
936
                });
84✔
937
                return Ok(BeginFrame(
96✔
938
                    callback,
96✔
939
                    true,
96✔
940
                    Some(container),
96✔
941
                    DisplayName::SemanticTag(tagnum),
96✔
942
                ));
96✔
943
            }
944
        };
945
        Ok(BeginFrame(
2,340✔
946
            callback,
2,340✔
947
            true,
2,340✔
948
            None,
2,340✔
949
            DisplayName::String(typename),
2,340✔
950
        ))
2,340✔
951
    }
3,100✔
952

953
    fn decode_special<'py>(
7,984✔
954
        &mut self,
7,984✔
955
        py: Python<'py>,
7,984✔
956
        subtype: u8,
7,984✔
957
    ) -> PyResult<DecoderResult<'py>> {
7,984✔
958
        // Major tag 7
959
        match subtype {
7,984✔
960
            0..20 => {
7,984✔
961
                let value = subtype.into_pyobject(py)?;
72✔
962
                CBORSimpleValue::new(value)?.into_bound_py_any(py)
72✔
963
            }
964
            20 => Ok(false.into_bound_py_any(py)?),
128✔
965
            21 => Ok(true.into_bound_py_any(py)?),
180✔
966
            22 => Ok(py.None().into_bound_py_any(py)?),
580✔
967
            23 => Ok(UNDEFINED.get(py).unwrap().into_bound_py_any(py)?),
24✔
968
            24 => {
969
                let value = self.read_exact::<1>(py)?[0];
84✔
970
                if value < 0x20 {
84✔
971
                    return Err(CBORDecodeError::new_err(
36✔
972
                        "invalid two-byte sequence for simple value",
36✔
973
                    ));
36✔
974
                }
48✔
975
                CBORSimpleValue::new(value.into_pyobject(py)?)?.into_bound_py_any(py)
48✔
976
            }
977
            25 => {
978
                let bytes = self.read_exact::<2>(py)?;
740✔
979
                f16::from_be_bytes(bytes).to_f32().into_bound_py_any(py)
740✔
980
            }
981
            26 => {
982
                let bytes = self.read_exact::<4>(py)?;
108✔
983
                f32::from_be_bytes(bytes).into_bound_py_any(py)
108✔
984
            }
985
            27 => {
986
                let bytes = self.read_exact::<8>(py)?;
5,864✔
987
                f64::from_be_bytes(bytes).into_bound_py_any(py)
5,864✔
988
            }
989
            31 => Ok(BREAK_MARKER.get(py).unwrap().into_bound_py_any(py)?),
168✔
990
            _ => Err(CBORDecodeError::new_err(format!(
36✔
991
                "undefined reserved major type 7 subtype 0x{subtype:x}"
36✔
992
            ))),
36✔
993
        }
994
        .map(Value)
7,948✔
995
    }
7,984✔
996

997
    //
998
    // Decoders for semantic tags (major tag 6)
999
    //
1000

1001
    fn decode_datetime_string<'py>(
604✔
1002
        value: Bound<'py, PyAny>,
604✔
1003
        _immutable: bool,
604✔
1004
    ) -> PyResult<DecoderResult<'py>> {
604✔
1005
        // Semantic tag 0
1006
        let py = value.py();
604✔
1007
        let value_type = value.get_type();
604✔
1008
        let mut datetime_str: Bound<'py, PyString> = value.cast_into().map_err(|e| {
604✔
UNCOV
1009
            create_exc_from(
×
UNCOV
1010
                py,
×
UNCOV
1011
                CBORDecodeError::new_err(format!(
×
1012
                    "expected string for tag, got {} instead",
1013
                    value_type
1014
                )),
1015
                Some(PyErr::from(e)),
×
1016
            )
UNCOV
1017
        })?;
×
1018

1019
        // Python 3.10 has impaired parsing of the ISO format:
1020
        // * It doesn't handle the standard "Z" suffix
1021
        // * It doesn't handle the fractional seconds part having fewer than 6 digits
1022
        if py.version_info() <= (3, 10) {
604✔
1023
            // Convert Z to +00:00
1024
            let mut temp_str = datetime_str.to_string().replacen("Z", "+00:00", 1);
134✔
1025

1026
            // Pad any microseconds part with zeros
1027
            if let Some((first, second)) = temp_str.split_once('.')
134✔
1028
                && let Some(index) = second.find(|c: char| !c.is_numeric())
681✔
1029
            {
1030
                let (mut micros, tz_part) = second.split_at(index);
93✔
1031
                // Cut off excess zeroes from the start of the microseconds part
1032
                if micros.len() >= 6 {
93✔
1033
                    micros = &micros[..6];
78✔
1034
                }
78✔
1035

1036
                // Reconstitute the datetime string, right-padding the microseconds part
1037
                // with zeroes
1038
                temp_str = format!("{first}.{micros:0<6}{tz_part}");
93✔
1039
            }
41✔
1040

1041
            datetime_str = temp_str.into_pyobject(py)?;
134✔
1042
        }
470✔
1043

1044
        DATETIME_FROMISOFORMAT
604✔
1045
            .get(py)?
604✔
1046
            .call1((&datetime_str,))
604✔
1047
            .map(CompleteFrame)
604✔
1048
    }
604✔
1049

1050
    fn decode_epoch_datetime(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
60✔
1051
        // Semantic tag 1
1052
        let py = value.py();
60✔
1053
        let utc = UTC.get(py)?;
60✔
1054
        DATETIME_FROMTIMESTAMP
60✔
1055
            .get(py)?
60✔
1056
            .call1((value, utc))
60✔
1057
            .map(CompleteFrame)
60✔
1058
    }
60✔
1059

1060
    fn decode_positive_bignum(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
60✔
1061
        // Semantic tag 2
1062
        let py = value.py();
60✔
1063
        INT_FROMBYTES
60✔
1064
            .get(py)?
60✔
1065
            .call1((value, intern!(py, "big")))
60✔
1066
            .map(CompleteFrame)
60✔
1067
    }
60✔
1068

1069
    fn decode_negative_bignum(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
36✔
1070
        // Semantic tag 3
1071
        let py = value.py();
36✔
1072
        let int = INT_FROMBYTES.get(py)?.call1((value, intern!(py, "big")))?;
36✔
1073
        int.neg()?.add(-1).map(CompleteFrame)
36✔
1074
    }
36✔
1075

1076
    fn decode_fraction(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
112✔
1077
        // Semantic tag 4
1078
        let py = value.py();
112✔
1079
        let tuple = require_tuple(value, 2)?;
112✔
1080
        let decimal_class = DECIMAL_TYPE.get(py)?;
100✔
1081
        {
1082
            let exp = tuple.get_item(0)?;
100✔
1083
            let sig_tuple = decimal_class
100✔
1084
                .call1((tuple.get_item(1)?,))?
100✔
1085
                .call_method0(intern!(py, "as_tuple"))?
100✔
1086
                .cast_into::<PyTuple>()?;
100✔
1087
            let sign = sig_tuple.get_item(0)?;
100✔
1088
            let digits = sig_tuple.get_item(1)?;
100✔
1089
            let args_tuple = PyTuple::new(py, [sign, digits, exp])?;
100✔
1090
            decimal_class.call1((args_tuple,)).map(CompleteFrame)
100✔
1091
        }
1092
    }
112✔
1093

1094
    fn decode_bigfloat(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
24✔
1095
        // Semantic tag 5
1096
        let py = value.py();
24✔
1097
        let tuple = require_tuple(value, 2)?;
24✔
1098
        let decimal_class = DECIMAL_TYPE.get(py)?;
12✔
1099
        {
1100
            let exp = decimal_class.call1((tuple.get_item(0)?,))?;
12✔
1101
            let sig = decimal_class.call1((tuple.get_item(1)?,))?;
12✔
1102
            let exp = PyInt::new(py, 2).pow(exp, py.None())?;
12✔
1103
            sig.mul(exp).map(CompleteFrame)
12✔
1104
        }
1105
    }
24✔
1106

1107
    fn decode_stringref(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
120✔
1108
        // Semantic tag 25
1109
        let index: usize = value.extract()?;
120✔
1110
        Ok(StringReference(index))
120✔
1111
    }
120✔
1112

1113
    fn decode_sharedref(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
180✔
1114
        // Semantic tag 29
1115
        let index: usize = value.extract()?;
180✔
1116
        Ok(SharedReference(index))
180✔
1117
    }
180✔
1118

1119
    fn decode_rational(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
100✔
1120
        // Semantic tag 30
1121
        let py = value.py();
100✔
1122
        let tuple = require_tuple(value, 2)?;
100✔
1123
        FRACTION_TYPE.get(py)?.call1(tuple).map(CompleteFrame)
88✔
1124
    }
100✔
1125

1126
    fn decode_regexp(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
36✔
1127
        // Semantic tag 35
1128
        RE_COMPILE
36✔
1129
            .get(value.py())?
36✔
1130
            .call1((value,))
36✔
1131
            .map(CompleteFrame)
36✔
1132
    }
36✔
1133

1134
    fn decode_mime(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
24✔
1135
        // Semantic tag 36
1136
        let py = value.py();
24✔
1137
        let parser = EMAIL_PARSER.get(py)?.call0()?;
24✔
1138
        parser
24✔
1139
            .call_method1(intern!(py, "parsestr"), (value,))
24✔
1140
            .map(CompleteFrame)
24✔
1141
    }
24✔
1142

1143
    fn decode_uuid(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
200✔
1144
        // Semantic tag 37
1145
        let py = value.py();
200✔
1146
        let kwargs = PyDict::new(py);
200✔
1147
        kwargs.set_item(intern!(py, "bytes"), value)?;
200✔
1148
        UUID_TYPE
200✔
1149
            .get(py)?
200✔
1150
            .call((), Some(&kwargs))
200✔
1151
            .map(CompleteFrame)
200✔
1152
    }
200✔
1153

1154
    fn decode_ipv4(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
172✔
1155
        // Semantic tag 52
1156
        let py = value.py();
172✔
1157
        let addr = if let Ok(bytes) = value.cast::<PyBytes>() {
172✔
1158
            // The decoded value was a bytestring, so this is an IPv4 address
1159
            IPV4ADDRESS_TYPE.get(py)?.call1((bytes,))?
136✔
1160
        } else if let Ok(tuple) = value.cast_into::<PyTuple>()
36✔
1161
            && tuple.len() == 2
36✔
1162
        {
1163
            // The decoded value was a 2-item array. Check the types of the elements:
1164
            // (int, bytes) -> network
1165
            // (bytes, int) -> interface
1166
            let first_item = tuple.get_item(0)?;
36✔
1167
            let second_item = tuple.get_item(1)?;
36✔
1168
            if let Ok(prefix) = first_item.cast::<PyInt>()
36✔
1169
                && let Ok(address) = second_item.cast::<PyBytes>()
24✔
1170
            {
1171
                let mut address_vec: Vec<u8> = address.extract()?;
24✔
1172
                if address_vec.len() > 4 {
24✔
1173
                    return Err(CBORDecodeError::new_err(format!(
12✔
1174
                        "address byte string for IPv4 network is too long ({} bytes)",
12✔
1175
                        address_vec.len()
12✔
1176
                    )));
12✔
1177
                }
12✔
1178
                address_vec.resize(4, 0);
12✔
1179
                IPV4NETWORK_TYPE.get(py)?.call1(((address_vec, prefix),))?
12✔
1180
            } else if let Ok(address) = first_item.cast::<PyBytes>()
12✔
1181
                && let Ok(prefix) = second_item.cast::<PyInt>()
12✔
1182
            {
1183
                IPV4INTERFACE_TYPE.get(py)?.call1(((address, prefix),))?
12✔
1184
            } else {
UNCOV
1185
                return Err(CBORDecodeError::new_err("invalid types in input array"));
×
1186
            }
1187
        } else {
UNCOV
1188
            return Err(CBORDecodeError::new_err(
×
1189
                "input value must be a bytestring or an array of 2 elements",
×
UNCOV
1190
            ));
×
1191
        };
1192
        Ok(CompleteFrame(addr))
160✔
1193
    }
172✔
1194

1195
    fn decode_ipv6(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
108✔
1196
        // Semantic tag 54
1197
        let py = value.py();
108✔
1198
        let ipv6addr_class = IPV6ADDRESS_TYPE.get(py)?;
108✔
1199
        let addr = if let Ok(bytes) = value.cast::<PyBytes>() {
108✔
1200
            // The decoded value was a bytestring, so this is an IPv6 address
1201
            ipv6addr_class.call1((bytes,))?
60✔
1202
        } else if let Ok(tuple) = value.cast_into::<PyTuple>()
48✔
1203
            && (2..=3).contains(&tuple.len())
48✔
1204
        {
1205
            // The decoded value was a 2-item (or 3 with zone ID) array.
1206
            // Check the types of the elements:
1207
            // (int, bytes) -> network
1208
            // (bytes, int) -> interface
1209
            let first_item = tuple.get_item(0)?;
48✔
1210
            let second_item = tuple.get_item(1)?;
48✔
1211
            let zone_id = tuple.get_item(2).ok();
48✔
1212
            let (class, addr_bytes, prefix) = if let Ok(prefix) = first_item.cast::<PyInt>()
48✔
1213
                && let Ok(address) = second_item.cast::<PyBytes>()
24✔
1214
            {
1215
                let mut address_vec: Vec<u8> = address.extract()?;
24✔
1216
                if address_vec.len() > 16 {
24✔
1217
                    return Err(CBORDecodeError::new_err(format!(
12✔
1218
                        "address byte string for IPv6 network is too long ({} bytes)",
12✔
1219
                        address_vec.len()
12✔
1220
                    )));
12✔
1221
                }
12✔
1222
                address_vec.resize(16, 0);
12✔
1223
                Ok((
1224
                    IPV6NETWORK_TYPE.get(py)?,
12✔
1225
                    PyBytes::new(py, address_vec.as_slice()),
12✔
1226
                    prefix,
12✔
1227
                ))
1228
            } else if let Ok(address) = first_item.cast_into::<PyBytes>()
24✔
1229
                && let Ok(prefix) = second_item.cast::<PyInt>()
24✔
1230
            {
1231
                Ok((IPV6INTERFACE_TYPE.get(py)?, address, prefix))
24✔
1232
            } else {
UNCOV
1233
                Err(CBORDecodeError::new_err("invalid types in input array"))
×
UNCOV
1234
            }?;
×
1235
            let addr_obj = ipv6addr_class.call1((addr_bytes,))?;
36✔
1236

1237
            // Format the zone ID suffix if a zone ID was included
1238
            // (bytes or integer as the last item of a 3-tuple)
1239
            let zone_id_suffix = if let Some(zone_id) = zone_id {
36✔
1240
                if let Ok(zone_id_bytes) = zone_id.cast::<PyBytes>() {
24✔
1241
                    let zone_id_str = String::from_utf8(zone_id_bytes.as_bytes().to_vec())?;
12✔
1242
                    format!("%{zone_id_str}")
12✔
1243
                } else if let Ok(zone_id_int) = zone_id.cast::<PyInt>() {
12✔
1244
                    format!("%{zone_id_int}")
12✔
1245
                } else {
UNCOV
1246
                    return Err(CBORDecodeError::new_err(
×
UNCOV
1247
                        "zone ID must be an integer or a bytestring",
×
UNCOV
1248
                    ));
×
1249
                }
1250
            } else {
1251
                String::default()
12✔
1252
            };
1253

1254
            let formatted_addr = format!("{addr_obj}{zone_id_suffix}/{prefix}");
36✔
1255
            class.call1((formatted_addr,))?
36✔
1256
        } else {
UNCOV
1257
            return Err(CBORDecodeError::new_err(
×
UNCOV
1258
                "input value must be a bytestring or an array of 2 elements",
×
UNCOV
1259
            ));
×
1260
        };
1261
        Ok(CompleteFrame(addr))
96✔
1262
    }
108✔
1263

1264
    fn decode_epoch_date(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
12✔
1265
        // Semantic tag 100
1266
        let py = value.py();
12✔
1267
        let value = value.extract::<i32>()? + 719163;
12✔
1268
        DATE_FROMORDINAL.get(py)?.call1((value,)).map(CompleteFrame)
12✔
1269
    }
12✔
1270

1271
    fn decode_ipaddress(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
84✔
1272
        // Semantic tag 260 (deprecated)
1273
        let py = value.py();
84✔
1274
        let value = value.cast_into::<PyBytes>()?;
84✔
1275
        let addr_obj = match value.len()? {
72✔
1276
            4 | 16 => IPADDRESS_FUNC.get(py)?.call1((value,)),
48✔
1277
            6 => Ok(Bound::new(py, CBORTag::new_internal(260, value.into_any()))?.into_any()), // MAC address
12✔
1278
            length => Err(CBORDecodeError::new_err(format!(
12✔
1279
                "invalid IP address length ({length})"
12✔
1280
            ))),
12✔
1281
        }?;
12✔
1282
        Ok(CompleteFrame(addr_obj))
60✔
1283
    }
84✔
1284

1285
    fn decode_ipnetwork<'py>(
84✔
1286
        value: Bound<'py, PyAny>,
84✔
1287
        _immutable: bool,
84✔
1288
    ) -> PyResult<DecoderResult<'py>> {
84✔
1289
        // Semantic tag 261 (deprecated)
1290
        let py = value.py();
84✔
1291
        let value: Bound<'py, PyMapping> = value.cast_into()?;
84✔
1292
        let length = value.len()?;
84✔
1293
        if length != 1 {
84✔
1294
            return Err(CBORDecodeError::new_err(format!(
12✔
1295
                "invalid input map length for IP network: {}",
12✔
1296
                length
12✔
1297
            )));
12✔
1298
        }
72✔
1299
        let first_item = value.items()?.get_item(0)?;
72✔
1300
        let mask_length = first_item.get_item(1)?;
72✔
1301
        if !mask_length.is_exact_instance_of::<PyInt>() {
72✔
1302
            return Err(CBORDecodeError::new_err(format!(
12✔
1303
                "invalid mask length for IP network: {mask_length}"
12✔
1304
            )));
12✔
1305
        }
60✔
1306

1307
        let addr_obj = match IPNETWORK_FUNC.get(py)?.call1((&first_item,)) {
60✔
1308
            Ok(ip_network) => Ok(ip_network),
48✔
1309
            Err(e) => {
12✔
1310
                // A CompleteFrameError may indicate that the bytestring has host bits set, so try parsing
1311
                // it as an IP interface instead
1312
                if e.is_instance_of::<PyValueError>(py) {
12✔
1313
                    IPINTERFACE_FUNC.get(py)?.call1((first_item,))
12✔
1314
                } else {
UNCOV
1315
                    Err(e)
×
1316
                }
1317
            }
UNCOV
1318
        }?;
×
1319
        Ok(CompleteFrame(addr_obj))
60✔
1320
    }
84✔
1321

1322
    fn decode_date_string(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
12✔
1323
        // Semantic tag 1004
1324
        let py = value.py();
12✔
1325
        let date = DATE_FROMISOFORMAT.get(py)?.call1((value,))?;
12✔
1326
        Ok(CompleteFrame(date))
12✔
1327
    }
12✔
1328

1329
    fn decode_complex(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
252✔
1330
        // Semantic tag 43000
1331
        let py = value.py();
252✔
1332
        let tuple = require_tuple(value, 2)?;
252✔
1333
        let real: f64 = tuple.get_item(0)?.extract()?;
252✔
1334
        let imag: f64 = tuple.get_item(1)?.extract()?;
252✔
1335
        Ok(CompleteFrame(
252✔
1336
            PyComplex::from_doubles(py, real, imag).into_any(),
252✔
1337
        ))
252✔
1338
    }
252✔
1339

1340
    fn decode_self_describe_cbor(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
24✔
1341
        // Semantic tag 55799
1342
        Ok(CompleteFrame(value))
24✔
1343
    }
24✔
1344

1345
    fn decode_set<'py>(
268✔
1346
        &mut self,
268✔
1347
        py: Python<'py>,
268✔
1348
        immutable: bool,
268✔
1349
    ) -> PyResult<DecoderResult<'py>> {
268✔
1350
        // Semantic tag 258
1351
        let mut set_or_none = if immutable {
268✔
1352
            None
36✔
1353
        } else {
1354
            Some(PySet::empty(py)?.into_any())
232✔
1355
        };
1356
        let container = set_or_none.clone();
268✔
1357
        let callback = move |item: Bound<'py, PyAny>, _immutable: bool| {
268✔
1358
            let container: Bound<'py, PyAny> = if let Some(set) = set_or_none.take() {
256✔
1359
                set.call_method1(intern!(py, "update"), (item,))?;
220✔
1360
                set.into_any()
220✔
1361
            } else {
1362
                let tuple = item.cast_into::<PyTuple>()?;
36✔
1363
                PyFrozenSet::new(py, tuple)?.into_any()
36✔
1364
            };
1365
            Ok(CompleteFrame(container))
256✔
1366
        };
256✔
1367
        Ok(BeginFrame(
268✔
1368
            Box::new(callback),
268✔
1369
            true,
268✔
1370
            container,
268✔
1371
            DisplayName::String("set"),
268✔
1372
        ))
268✔
1373
    }
268✔
1374
}
1375

UNCOV
1376
#[pymethods]
×
1377
impl CBORDecoder {
1378
    #[new]
1379
    #[pyo3(signature = (
1380
        fp,
1381
        *,
1382
        tag_hook = None,
1383
        object_hook = None,
1384
        semantic_decoders = None,
1385
        str_errors = "strict",
1386
        read_size = 4096,
1387
        max_depth = 400,
1388
        allow_indefinite = true,
1389
        allow_duplicate_keys = true,
1390
    ))]
1391
    pub fn new(
444✔
1392
        py: Python<'_>,
444✔
1393
        fp: &Bound<'_, PyAny>,
444✔
1394
        tag_hook: Option<&Bound<'_, PyAny>>,
444✔
1395
        object_hook: Option<&Bound<'_, PyAny>>,
444✔
1396
        semantic_decoders: Option<&Bound<'_, PyMapping>>,
444✔
1397
        str_errors: &str,
444✔
1398
        read_size: usize,
444✔
1399
        max_depth: usize,
444✔
1400
        allow_indefinite: bool,
444✔
1401
        allow_duplicate_keys: bool,
444✔
1402
    ) -> PyResult<Self> {
444✔
1403
        Self::new_internal(
444✔
1404
            py,
444✔
1405
            Some(fp),
444✔
1406
            None,
444✔
1407
            tag_hook,
444✔
1408
            object_hook,
444✔
1409
            semantic_decoders,
444✔
1410
            str_errors,
444✔
1411
            read_size,
444✔
1412
            max_depth,
444✔
1413
            allow_indefinite,
444✔
1414
            allow_duplicate_keys,
444✔
1415
        )
1416
    }
444✔
1417

1418
    #[getter]
UNCOV
1419
    fn fp(&self, py: Python<'_>) -> Option<Py<PyAny>> {
×
UNCOV
1420
        self.fp.as_ref().map(|fp| fp.clone_ref(py))
×
UNCOV
1421
    }
×
1422

1423
    #[setter]
1424
    fn set_fp(&mut self, fp: &Bound<'_, PyAny>) -> PyResult<()> {
456✔
1425
        let result = fp.call_method0("readable");
456✔
1426
        if let Ok(readable) = &result
456✔
1427
            && readable.is_truthy()?
444✔
1428
        {
1429
            self.fp_is_seekable = fp.call_method0("seekable")?.is_truthy()?;
432✔
1430
            let fp = fp.clone();
432✔
1431
            self.read_method = Some(fp.getattr("read")?.unbind());
432✔
1432
            self.fp = Some(fp.unbind());
432✔
1433
            self.available_bytes = 0;
432✔
1434
            self.read_position = 0;
432✔
1435
            self.buffer = None;
432✔
1436
            Ok(())
432✔
1437
        } else {
1438
            raise_exc_from(
24✔
1439
                fp.py(),
24✔
1440
                PyValueError::new_err("fp must be a readable file-like object"),
24✔
1441
                result.err(),
24✔
1442
            )
1443
        }
1444
    }
456✔
1445

1446
    #[getter]
1447
    fn tag_hook(&self, py: Python<'_>) -> Option<Py<PyAny>> {
12✔
1448
        self.tag_hook
12✔
1449
            .as_ref()
12✔
1450
            .map(|tag_hook| tag_hook.clone_ref(py))
12✔
1451
    }
12✔
1452

1453
    #[setter]
1454
    fn set_tag_hook(&mut self, tag_hook: Option<&Bound<'_, PyAny>>) -> PyResult<()> {
4,656✔
1455
        if let Some(tag_hook) = tag_hook {
4,656✔
1456
            if !tag_hook.is_callable() {
132✔
1457
                return Err(PyErr::new::<PyTypeError, _>(
12✔
1458
                    "tag_hook must be callable or None",
12✔
1459
                ));
12✔
1460
            }
120✔
1461

1462
            self.tag_hook = Some(tag_hook.clone().unbind());
120✔
1463
        } else {
4,524✔
1464
            self.tag_hook = None;
4,524✔
1465
        }
4,524✔
1466
        Ok(())
4,644✔
1467
    }
4,656✔
1468

1469
    #[getter]
1470
    fn object_hook(&self, py: Python<'_>) -> Option<Py<PyAny>> {
12✔
1471
        self.object_hook
12✔
1472
            .as_ref()
12✔
1473
            .map(|object_hook| object_hook.clone_ref(py))
12✔
1474
    }
12✔
1475

1476
    #[setter]
1477
    fn set_object_hook(&mut self, object_hook: Option<&Bound<'_, PyAny>>) -> PyResult<()> {
4,644✔
1478
        if let Some(object_hook) = object_hook {
4,644✔
1479
            if !object_hook.is_callable() {
48✔
1480
                return Err(PyErr::new::<PyTypeError, _>(
12✔
1481
                    "object_hook must be callable or None",
12✔
1482
                ));
12✔
1483
            }
36✔
1484

1485
            self.object_hook = Some(object_hook.clone().unbind());
36✔
1486
        } else {
4,596✔
1487
            self.object_hook = None;
4,596✔
1488
        }
4,596✔
1489
        Ok(())
4,632✔
1490
    }
4,644✔
1491

1492
    #[getter]
1493
    fn str_errors(&self, py: Python<'_>) -> Py<PyString> {
60✔
1494
        if let Some(str_errors) = self.str_errors.as_ref() {
60✔
1495
            str_errors.clone_ref(py)
48✔
1496
        } else {
1497
            intern!(py, "strict").clone().unbind()
12✔
1498
        }
1499
    }
60✔
1500

1501
    #[setter]
1502
    fn set_str_errors(&mut self, str_errors: &Bound<'_, PyString>) -> PyResult<()> {
4,632✔
1503
        let as_string: &str = str_errors.extract()?;
4,632✔
1504
        self.str_errors = match as_string {
4,632✔
1505
            "strict" => None,
4,632✔
1506
            "ignore" | "replace" | "backslashreplace" | "surrogateescape" => {
108✔
1507
                Some(str_errors.clone().unbind())
96✔
1508
            }
1509
            _ => {
1510
                return Err(PyValueError::new_err(format!(
12✔
1511
                    "invalid str_errors value: '{str_errors}'"
12✔
1512
                )));
12✔
1513
            }
1514
        };
1515
        Ok(())
4,620✔
1516
    }
4,632✔
1517

1518
    /// Read bytes from the data stream.
1519
    ///
1520
    /// :param amount: the number of bytes to read
1521
    #[pyo3(signature = (amount, /))]
1522
    fn read(&mut self, py: Python<'_>, amount: usize) -> PyResult<Vec<u8>> {
10,390,316✔
1523
        if amount == 0 {
10,390,316✔
1524
            return Ok(Vec::default());
224✔
1525
        }
10,390,092✔
1526

1527
        if self.available_bytes == 0 {
10,390,092✔
1528
            // No buffer
1529
            let (new_bytes, amount_read) = self.read_from_fp(py, amount)?;
72✔
1530
            self.read_position = amount;
12✔
1531
            self.available_bytes = amount_read - amount;
12✔
1532
            let new_buffer = new_bytes.as_bytes()[..amount].to_vec();
12✔
1533
            self.buffer = Some(new_bytes.unbind());
12✔
1534
            Ok(new_buffer)
12✔
1535
        } else if self.available_bytes < amount {
10,390,020✔
1536
            // Combine the remnants of the partial buffer with new data read from the file
1537
            let needed_bytes = amount - self.available_bytes;
96✔
1538
            let mut concatenated_buffer: Vec<u8> =
96✔
1539
                self.buffer.take().unwrap().as_bytes(py)[self.read_position..].to_vec();
96✔
1540
            let (new_bytes, amount_read) = self.read_from_fp(py, needed_bytes)?;
96✔
1541
            concatenated_buffer.extend_from_slice(&new_bytes[..needed_bytes]);
24✔
1542
            self.buffer = Some(new_bytes.unbind());
24✔
1543
            self.available_bytes = amount_read - needed_bytes;
24✔
1544
            self.read_position = needed_bytes;
24✔
1545
            Ok(concatenated_buffer)
24✔
1546
        } else {
1547
            // Return a slice from the existing bytes object
1548
            let vec = self.buffer.as_ref().unwrap().as_bytes(py)
10,389,924✔
1549
                [self.read_position..self.read_position + amount]
10,389,924✔
1550
                .to_vec();
10,389,924✔
1551
            self.available_bytes -= amount;
10,389,924✔
1552
            self.read_position += amount;
10,389,924✔
1553
            Ok(vec)
10,389,924✔
1554
        }
1555
    }
10,390,316✔
1556

1557
    /// Decode the next value from the stream.
1558
    ///
1559
    /// :param immutable: if :data:`True`, decode the next item as an immutable type
1560
    ///     (e.g. :class:`tuple` instead of a :class:`list`), if possible
1561
    /// :return: the decoded object
1562
    /// :raises CBORDecodeError: if there is any problem decoding the stream
1563
    #[pyo3(signature = (*, immutable = false))]
1564
    pub fn decode<'py>(&mut self, py: Python<'py>, immutable: bool) -> PyResult<Bound<'py, PyAny>> {
4,500✔
1565
        let mut frames: Vec<StackFrame> = Vec::new();
4,500✔
1566

1567
        fn add_frame<'a>(
11,524✔
1568
            frames: &mut Vec<StackFrame<'a>>,
11,524✔
1569
            max_depth: usize,
11,524✔
1570
            frame: StackFrame<'a>,
11,524✔
1571
        ) -> PyResult<()> {
11,524✔
1572
            if frames.len() == max_depth {
11,524✔
1573
                return Err(CBORDecodeError::new_err(format!(
24✔
1574
                    "maximum container nesting depth ({max_depth}) exceeded",
24✔
1575
                )));
24✔
1576
            }
11,500✔
1577

1578
            frames.push(frame);
11,500✔
1579
            Ok(())
11,500✔
1580
        }
11,524✔
1581

1582
        fn wrap_exception(py: Python<'_>, err: PyErr, typename: &DisplayName) -> PyErr {
660✔
1583
            if err.is_instance_of::<CBORDecodeEOF>(py) {
660✔
1584
                err
120✔
1585
            } else if err.is_instance_of::<CBORDecodeError>(py) {
540✔
1586
                CBORDecodeError::new_err(format!(
300✔
1587
                    "error decoding {}: {}",
1588
                    typename,
1589
                    err.arguments(py)
300✔
1590
                ))
1591
            } else {
1592
                create_exc_from(
240✔
1593
                    py,
240✔
1594
                    CBORDecodeError::new_err(format!("error decoding {}", typename)),
240✔
1595
                    Some(err),
240✔
1596
                )
1597
            }
1598
        }
660✔
1599

1600
        let mut shareables: Vec<Option<Bound<'py, PyAny>>> = Vec::new();
4,500✔
1601
        let mut string_namespaces: Vec<Vec<Bound<'py, PyAny>>> = Vec::new();
4,500✔
1602
        let mut value: Option<Bound<'py, PyAny>> = None;
4,500✔
1603
        let mut current_immutable: bool = immutable;
4,500✔
1604
        loop {
1605
            let result: PyResult<DecoderResult<'py>> = if let Some(previous_value) = value.take() {
1,618,096✔
1606
                // Call the decoder callback of the last frame
1607
                let frame = frames.last_mut().unwrap();
804,092✔
1608
                if let Some(decoder_callback) = frame.decoder_callback.as_mut() {
804,092✔
1609
                    decoder_callback(previous_value, frame.immutable)
803,984✔
1610
                        .map_err(|e| wrap_exception(py, e, &frame.typename))
803,984✔
1611
                } else if frame.contains_string_namespace {
108✔
1612
                    string_namespaces
36✔
1613
                        .pop()
36✔
1614
                        .expect("no string namespaces to pop from");
36✔
1615
                    Ok(CompleteFrame(previous_value))
36✔
1616
                } else if let Some(shareable_index) = frame.shareable_index {
72✔
1617
                    shareables[shareable_index].get_or_insert_with(|| previous_value.clone());
72✔
1618
                    Ok(CompleteFrame(previous_value))
72✔
1619
                } else {
UNCOV
1620
                    panic!("no decoder callback, shareable index or string namespace");
×
1621
                }
1622
            } else {
1623
                let (major_type, subtype) = self.read_major_and_subtype(py)?;
814,004✔
1624
                match major_type {
813,980✔
1625
                    0 => self.decode_uint(py, subtype),
3,252✔
1626
                    1 => self.decode_negint(py, subtype),
380✔
1627
                    2 => self.decode_bytestring(py, subtype),
1,248✔
1628
                    3 => self.decode_string(py, subtype),
788,984✔
1629
                    4 => self.decode_array(py, subtype, current_immutable),
7,148✔
1630
                    5 => self.decode_map(py, subtype, current_immutable),
1,884✔
1631
                    6 => self.decode_semantic(py, subtype, current_immutable),
3,100✔
1632
                    7 => self.decode_special(py, subtype),
7,984✔
UNCOV
1633
                    _ => Err(CBORDecodeError::new_err(format!(
×
UNCOV
1634
                        "invalid major type: {major_type}"
×
UNCOV
1635
                    ))),
×
1636
                }
1637
                .map_err(|e| {
813,980✔
1638
                    let typename = match major_type {
360✔
1639
                        0 => "unsigned integer",
12✔
UNCOV
1640
                        1 => "negative integer",
×
1641
                        2 => "byte string",
108✔
1642
                        3 => "text string",
168✔
UNCOV
1643
                        4 => "array",
×
1644
                        5 => "map",
×
UNCOV
1645
                        6 => "semantic tag",
×
1646
                        7 => "special value",
72✔
1647
                        _ => unreachable!("invalid major types should have been handled earlier"),
×
1648
                    };
1649
                    wrap_exception(py, e, &DisplayName::String(typename))
360✔
1650
                })
360✔
1651
            };
1652

1653
            match result {
1,617,412✔
1654
                Ok(BeginFrame(callback, requested_immutable, container, typename)) => {
11,224✔
1655
                    if let Some(frame) = frames.last_mut()
11,224✔
1656
                        && let Some(container) = container
8,608✔
1657
                        && let Some(shareable_index) = frame.shareable_index
6,064✔
1658
                    {
156✔
1659
                        frames.pop();
156✔
1660
                        shareables[shareable_index] = Some(container.clone());
156✔
1661
                    }
11,068✔
1662
                    current_immutable = current_immutable || requested_immutable;
11,224✔
1663
                    add_frame(
11,224✔
1664
                        &mut frames,
11,224✔
1665
                        self.max_depth,
11,224✔
1666
                        StackFrame {
11,224✔
1667
                            immutable: current_immutable,
11,224✔
1668
                            decoder_callback: Some(callback),
11,224✔
1669
                            shareable_index: None,
11,224✔
1670
                            typename,
11,224✔
1671
                            contains_string_namespace: false,
11,224✔
1672
                        },
11,224✔
1673
                    )?;
24✔
1674
                }
1675
                Ok(ContinueFrame(require_immutable)) => {
798,004✔
1676
                    // If require_immutable is true, the next value must be immutable
1677
                    // Otherwise, restore the immutable flag to the previous value
1678
                    current_immutable = if frames.len() >= 2 {
798,004✔
1679
                        frames.get(frames.len() - 2).unwrap().immutable
789,136✔
1680
                    } else {
1681
                        immutable
8,868✔
1682
                    } || require_immutable;
796,592✔
1683
                    frames.last_mut().unwrap().immutable = current_immutable;
798,004✔
1684
                }
1685
                Ok(CompleteFrame(new_value)) => {
5,488✔
1686
                    frames
5,488✔
1687
                        .pop()
5,488✔
1688
                        .expect("received frame completion but there are no frames on the stack");
5,488✔
1689
                    current_immutable = frames.last().map_or(immutable, |frame| frame.immutable);
5,488✔
1690
                    value = Some(new_value);
5,488✔
1691
                }
1692
                Ok(Value(new_value)) => {
12,200✔
1693
                    value = Some(new_value);
12,200✔
1694
                }
12,200✔
1695
                Ok(StringNamespace) => {
1696
                    add_frame(
60✔
1697
                        &mut frames,
60✔
1698
                        self.max_depth,
60✔
1699
                        StackFrame {
60✔
1700
                            immutable: current_immutable,
60✔
1701
                            decoder_callback: None,
60✔
1702
                            shareable_index: None,
60✔
1703
                            typename: DisplayName::String("string namespace"),
60✔
1704
                            contains_string_namespace: true,
60✔
1705
                        },
60✔
UNCOV
1706
                    )?;
×
1707
                    string_namespaces.push(Vec::new());
60✔
1708
                }
1709
                Ok(StringValue(string, length)) => {
789,896✔
1710
                    // Conditionally add the string to the innermost string namespace
1711
                    if let Some(namespace) = string_namespaces.last_mut()
789,896✔
1712
                        && match namespace.len() {
786,516✔
1713
                            0..24 => length >= 3,
786,516✔
1714
                            24..256 => length >= 4,
786,168✔
1715
                            256..65536 => length >= 5,
783,384✔
1716
                            65536..=4294967295 => length >= 7,
24✔
UNCOV
1717
                            _ => length >= 11,
×
1718
                        }
1719
                    {
786,504✔
1720
                        namespace.push(string.clone());
786,504✔
1721
                    }
786,504✔
1722
                    value = Some(string);
789,896✔
1723
                }
1724
                Ok(StringReference(index)) => {
120✔
1725
                    frames
120✔
1726
                        .pop()
120✔
1727
                        .expect("  received string reference but there are no frames on the stack");
120✔
1728
                    if let Some(namespace) = string_namespaces.last() {
120✔
1729
                        if let Some(string) = namespace.get(index) {
108✔
1730
                            value = Some(string.clone());
96✔
1731
                        } else {
96✔
1732
                            return Err(CBORDecodeError::new_err(format!(
12✔
1733
                                "string reference {index} not found"
12✔
1734
                            )));
12✔
1735
                        }
1736
                    } else {
1737
                        return Err(CBORDecodeError::new_err(
12✔
1738
                            "string reference outside of namespace",
12✔
1739
                        ));
12✔
1740
                    }
1741
                    current_immutable = frames
96✔
1742
                        .last()
96✔
1743
                        .map_or(current_immutable, |frame| frame.immutable);
96✔
1744
                }
1745
                Ok(Shareable) => {
1746
                    add_frame(
240✔
1747
                        &mut frames,
240✔
1748
                        self.max_depth,
240✔
1749
                        StackFrame {
240✔
1750
                            immutable: current_immutable,
240✔
1751
                            decoder_callback: None,
240✔
1752
                            shareable_index: Some(shareables.len()),
240✔
1753
                            typename: DisplayName::String("shareable value"),
240✔
1754
                            contains_string_namespace: false,
240✔
1755
                        },
240✔
UNCOV
1756
                    )?;
×
1757
                    shareables.push(None);
240✔
1758
                }
1759
                Ok(SharedReference(index)) => {
180✔
1760
                    frames
180✔
1761
                        .pop()
180✔
1762
                        .expect("received shared reference but there are no frames on the stack");
180✔
1763
                    value = match shareables.get(index) {
180✔
1764
                        Some(Some(value)) => Some(value.clone()),
144✔
1765
                        Some(None) => {
1766
                            return Err(CBORDecodeError::new_err(format!(
12✔
1767
                                "shared value {index} has not been initialized"
12✔
1768
                            )));
12✔
1769
                        }
1770
                        None => {
1771
                            return Err(CBORDecodeError::new_err(format!(
24✔
1772
                                "shared reference {index} not found"
24✔
1773
                            )));
24✔
1774
                        }
1775
                    };
1776
                    current_immutable = frames
144✔
1777
                        .last()
144✔
1778
                        .map_or(current_immutable, |frame| frame.immutable);
144✔
1779
                }
1780
                Err(err) => {
660✔
1781
                    // If an Exception was raised, wrap it in a CBORDecodeError
1782
                    // If a ValueError was raised, wrap it in a CBORDecodeError
1783
                    return if err.is_instance_of::<CBORDecodeError>(py) {
660✔
1784
                        Err(err)
660✔
UNCOV
1785
                    } else if err.is_instance_of::<PyValueError>(py)
×
UNCOV
1786
                        || err.is_instance_of::<PyException>(py)
×
1787
                    {
UNCOV
1788
                        Err(create_exc_from(
×
1789
                            py,
×
1790
                            CBORDecodeError::new_err(err.to_string()),
×
UNCOV
1791
                            Some(err),
×
1792
                        ))
×
1793
                    } else {
1794
                        Err(err)
×
1795
                    };
1796
                }
1797
            }
1798

1799
            if frames.is_empty() {
1,617,328✔
1800
                // If fp was seekable and excess data has been read, empty the buffer and
1801
                // rewind the file
1802
                if self.available_bytes > 0
3,732✔
1803
                    && let Some(fp) = &self.fp
24✔
1804
                {
1805
                    let offset = -(self.available_bytes as isize);
24✔
1806
                    fp.call_method1(py, intern!(py, "seek"), (offset, SEEK_CUR))?;
24✔
1807
                    self.buffer = None;
24✔
1808
                    self.available_bytes = 0;
24✔
1809
                    self.read_position = 0;
24✔
1810
                }
3,708✔
1811
                return Ok(value.expect("stack is empty but final return value is missing"));
3,732✔
1812
            }
1,613,596✔
1813
        }
1814
    }
4,500✔
1815
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc