• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

agronholm / cbor2 / 26990345403

05 Jun 2026 01:45AM UTC coverage: 94.679% (-0.06%) from 94.743%
26990345403

Pull #311

github

web-flow
Merge 212f8f2f7 into ed869d568
Pull Request #311: Add array_hook to surface indefinite-length arrays on decode

69 of 74 new or added lines in 2 files covered. (93.24%)

1 existing line in 1 file now uncovered.

2402 of 2537 relevant lines covered (94.68%)

1555.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.69
/rust/decoder.rs
1
use crate::_cbor2::{BREAK_MARKER, SYS_MAXSIZE, UNDEFINED};
2
use crate::decoder::DecoderResult::{
3
    BeginFrame, CompleteFrame, ContinueFrame, Shareable, SharedReference, StringNamespace,
4
    StringReference, StringValue, Value,
5
};
6
#[cfg(not(Py_3_15))]
7
use crate::types::FrozenDict;
8
use crate::types::{
9
    CBORDecodeEOF, CBORDecodeError, CBORSimpleValue, CBORTag, DECIMAL_TYPE, FRACTION_TYPE,
10
    IPV4ADDRESS_TYPE, IPV4INTERFACE_TYPE, IPV4NETWORK_TYPE, IPV6ADDRESS_TYPE, IPV6INTERFACE_TYPE,
11
    IPV6NETWORK_TYPE, UUID_TYPE,
12
};
13
use crate::utils::{PyImportable, create_exc_from, raise_exc_from};
14
use half::f16;
15
use pyo3::exceptions::{PyException, PyLookupError, PyTypeError, PyValueError};
16
use pyo3::prelude::*;
17
use pyo3::sync::PyOnceLock;
18
use pyo3::types::{
19
    PyBytes, PyCFunction, PyComplex, PyDict, PyFrozenSet, PyInt, PyList, PyListMethods, PyMapping,
20
    PySet, PyString, PyTuple,
21
};
22
use pyo3::{IntoPyObjectExt, Py, PyAny, PyErrArguments, intern, pyclass};
23
use std::fmt::{Display, Formatter};
24
use std::mem::{replace, take};
25

26
const IMMUTABLE_ATTR: &str = "_cbor2_immutable";
27
const NAME_ATTR: &str = "_cbor2_name";
28
const SEEK_CUR: u8 = 1;
29

30
static DATE_FROMISOFORMAT: PyImportable = PyImportable::new("datetime", "date.fromisoformat");
31
static DATE_FROMORDINAL: PyImportable = PyImportable::new("datetime", "date.fromordinal");
32
static DATETIME_FROMISOFORMAT: PyImportable =
33
    PyImportable::new("datetime", "datetime.fromisoformat");
34
static DATETIME_FROMTIMESTAMP: PyImportable =
35
    PyImportable::new("datetime", "datetime.fromtimestamp");
36
static EMAIL_PARSER: PyImportable = PyImportable::new("email.parser", "Parser");
37
static INCREMENTAL_UTF8_DECODER: PyOnceLock<Py<PyAny>> = PyOnceLock::new();
38
static INT_FROMBYTES: PyImportable = PyImportable::new("builtins", "int.from_bytes");
39
static IPADDRESS_FUNC: PyImportable = PyImportable::new("ipaddress", "ip_address");
40
static IPNETWORK_FUNC: PyImportable = PyImportable::new("ipaddress", "ip_network");
41
static IPINTERFACE_FUNC: PyImportable = PyImportable::new("ipaddress", "ip_interface");
42
static RE_COMPILE: PyImportable = PyImportable::new("re", "compile");
43
static UTC: PyImportable = PyImportable::new("datetime", "timezone.utc");
44
#[cfg(Py_3_15)]
45
static FROZEN_DICT: PyImportable = PyImportable::new("builtins", "frozendict");
46

47
enum DecoderResult<'a> {
48
    BeginFrame(
49
        Box<DecoderCallback<'a>>,
50
        bool,
51
        Option<Bound<'a, PyAny>>,
52
        DisplayName<'a>,
53
    ),
54
    ContinueFrame(bool),
55
    CompleteFrame(Bound<'a, PyAny>),
56
    Value(Bound<'a, PyAny>),
57
    StringValue(Bound<'a, PyAny>, usize),
58
    StringNamespace,
59
    StringReference(usize),
60
    Shareable,
61
    SharedReference(usize),
62
}
63

64
enum DisplayName<'a> {
65
    String(&'static str),
66
    SemanticTag(u64),
67
    PythonName(Bound<'a, PyAny>),
68
}
69

70
impl<'a> Display for DisplayName<'a> {
71
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
528✔
72
        match self {
528✔
73
            DisplayName::String(s) => f.write_str(s),
504✔
74
            DisplayName::SemanticTag(tagnum) => write!(f, "semantic tag {}", tagnum),
12✔
75
            DisplayName::PythonName(obj) => write!(f, "{}", obj),
12✔
76
        }
77
    }
528✔
78
}
79

80
type DecoderCallback<'py> =
81
    dyn 'py + FnMut(Bound<'py, PyAny>, bool) -> PyResult<DecoderResult<'py>>;
82

83
struct StackFrame<'py> {
84
    immutable: bool,
85
    decoder_callback: Option<Box<DecoderCallback<'py>>>,
86
    shareable_index: Option<usize>,
87
    typename: DisplayName<'py>,
88
    contains_string_namespace: bool,
89
}
90

91
/// Decorates a function to be a two-stage decoder.
92
///
93
/// :param name: the name displayed in a :exc:`CBORDecodeError` raised by the decoder
94
///     (e.g. "error decoding thingamajig") where name='thingamajig`)
95
/// :param immutable: :data:`True` if the item sent to the decoder should be decoded as immutable
96
#[pyfunction]
97
#[pyo3(signature = (func=None, /, *, name=None, immutable=false))]
98
pub fn shareable_decoder<'py>(
120✔
99
    py: Python<'py>,
120✔
100
    func: Option<Py<PyAny>>,
120✔
101
    name: Option<Py<PyString>>,
120✔
102
    immutable: bool,
120✔
103
) -> PyResult<Bound<'py, PyAny>> {
120✔
104
    match func {
120✔
105
        None => PyCFunction::new_closure(
60✔
106
            py,
60✔
107
            None,
60✔
108
            None,
60✔
109
            move |args: &Bound<'_, PyTuple>,
110
                  _kwargs: Option<&Bound<'_, PyDict>>|
111
                  -> PyResult<Py<PyAny>> {
60✔
112
                let py = args.py();
60✔
113
                let func = args.get_item(0)?;
60✔
114
                let name = name.as_ref().map(|x| x.clone_ref(py));
60✔
115
                shareable_decoder(py, Some(func.unbind()), name, immutable).map(Bound::unbind)
60✔
116
            },
60✔
117
        )
118
        .map(|f| f.into_any()),
60✔
119
        Some(func) => {
60✔
120
            let bound_func = func.bind(py);
60✔
121
            if !bound_func.is_callable() {
60✔
122
                return Err(PyTypeError::new_err(format!("{func} is not callable")));
×
123
            }
60✔
124
            bound_func.setattr(intern!(py, NAME_ATTR), name)?;
60✔
125
            bound_func.setattr(intern!(py, IMMUTABLE_ATTR), immutable)?;
60✔
126
            Ok(bound_func.clone().into_any())
60✔
127
        }
128
    }
129
}
120✔
130

131
fn require_tuple<'py>(value: Bound<'py, PyAny>, length: usize) -> PyResult<Bound<'py, PyTuple>> {
488✔
132
    let array: Bound<'py, PyTuple> = value
488✔
133
        .cast_into()
488✔
134
        .map_err(|_| PyTypeError::new_err("input value must be an array"))?;
488✔
135
    if array.len() != length {
452✔
136
        return Err(PyValueError::new_err(format!(
×
137
            "expected an array with exactly {length} elements"
×
138
        )));
×
139
    }
452✔
140
    Ok(array)
452✔
141
}
488✔
142

143
/// The CBORDecoder class implements a fully featured `CBOR`_ decoder with
144
/// several extensions for handling shared references, big integers, rational
145
/// numbers and so on. Typically, the class is not used directly, but the
146
/// :func:`load` and :func:`loads` functions are called to indirectly construct
147
/// and use the class.
148
///
149
/// When the class is constructed manually, the main entry point is :meth:`decode`.
150
///
151
/// :param fp: the file to read from (any file-like object opened for reading in binary mode)
152
/// :param tag_hook:
153
///     callable that takes 2 arguments: the decoder instance, and the :class:`.CBORTag`
154
///     to be decoded. This callback is invoked for any tags for which there is no
155
///     built-in decoder. The return value is substituted for the :class:`.CBORTag`
156
///     object in the deserialized output
157
/// :param object_hook:
158
///     callable that takes 2 arguments: the decoder instance, and a dictionary. This
159
///     callback is invoked for each deserialized :class:`dict` object. The return value
160
///     is substituted for the dict in the deserialized output.
161
/// :param array_hook:
162
///     callable that takes 2 arguments: the decoded array (a :class:`list`, or a
163
///     :class:`tuple` when ``immutable`` is true), and a boolean that is :data:`True` if
164
///     the array used indefinite-length encoding. This callback is invoked for each
165
///     deserialized array, and its return value is substituted for the array in the
166
///     deserialized output. It allows callers to preserve the distinction between
167
///     definite- and indefinite-length arrays, which is otherwise lost after decoding.
168
/// :param semantic_decoders:
169
///     An optional mapping for overriding the decoding for select semantic tags.
170
///     The value is a mapping of semantic tags (integers) to callables that take
171
///     the decoder instance as the sole argument.
172
/// :param str_errors:
173
///     determines how to handle Unicode decoding errors (see the `Error Handlers`_
174
///     section in the standard library documentation for details)
175
/// :param read_size: minimum number of bytes to read at once
176
///     (ignored if ``fp`` is not seekable)
177
/// :param max_depth:
178
///     maximum allowed depth for nested containers
179
/// :param allow_indefinite:
180
///     if :data:`False`, raise a :exc:`CBORDecodeError` when encountering an indefinite-length
181
///     string or container in the input stream
182
/// :param allow_duplicate_keys:
183
///     if :data:`False`, raise a :exc:`CBORDecodeError` when a map key that has already been
184
///     decoded in the same map is encountered
185
///
186
/// .. _CBOR: https://cbor.io/
187
#[pyclass(module = "cbor2")]
188
pub struct CBORDecoder {
189
    fp: Option<Py<PyAny>>,
190
    tag_hook: Option<Py<PyAny>>,
191
    object_hook: Option<Py<PyAny>>,
192
    array_hook: Option<Py<PyAny>>,
193
    semantic_decoders: Option<Py<PyMapping>>,
194
    str_errors: Option<Py<PyString>>,
195
    #[pyo3(get)]
196
    read_size: usize,
197
    #[pyo3(get)]
198
    max_depth: usize,
199
    #[pyo3(get)]
200
    allow_indefinite: bool,
201
    #[pyo3(get)]
202
    allow_duplicate_keys: bool,
203

204
    read_method: Option<Py<PyAny>>,
205
    buffer: Option<Py<PyBytes>>,
206
    read_position: usize,
207
    available_bytes: usize,
208
    fp_is_seekable: bool,
209
}
210

211
impl CBORDecoder {
212
    pub fn new_internal(
4,740✔
213
        py: Python<'_>,
4,740✔
214
        fp: Option<&Bound<'_, PyAny>>,
4,740✔
215
        buffer: Option<Bound<PyBytes>>,
4,740✔
216
        tag_hook: Option<&Bound<'_, PyAny>>,
4,740✔
217
        object_hook: Option<&Bound<'_, PyAny>>,
4,740✔
218
        array_hook: Option<&Bound<'_, PyAny>>,
4,740✔
219
        semantic_decoders: Option<&Bound<'_, PyMapping>>,
4,740✔
220
        str_errors: &str,
4,740✔
221
        read_size: usize,
4,740✔
222
        max_depth: usize,
4,740✔
223
        allow_indefinite: bool,
4,740✔
224
        allow_duplicate_keys: bool,
4,740✔
225
    ) -> PyResult<Self> {
4,740✔
226
        let available_bytes = if let Some(buffer) = buffer.as_ref() {
4,740✔
227
            buffer.len()?
4,260✔
228
        } else {
229
            0
480✔
230
        };
231
        let bound_str_errors = PyString::new(py, str_errors);
4,740✔
232
        let mut this = Self {
4,740✔
233
            fp: None,
4,740✔
234
            tag_hook: None,
4,740✔
235
            object_hook: None,
4,740✔
236
            array_hook: None,
4,740✔
237
            str_errors: None,
4,740✔
238
            read_size,
4,740✔
239
            max_depth,
4,740✔
240
            allow_indefinite,
4,740✔
241
            allow_duplicate_keys,
4,740✔
242
            semantic_decoders: semantic_decoders.map(|d| d.clone().unbind()),
4,740✔
243
            read_method: None,
4,740✔
244
            buffer: buffer.map(Bound::unbind),
4,740✔
245
            read_position: 0,
246
            available_bytes,
4,740✔
247
            fp_is_seekable: false,
248
        };
249
        if let Some(fp) = fp {
4,740✔
250
            this.set_fp(fp)?
480✔
251
        };
4,260✔
252
        this.set_tag_hook(tag_hook)?;
4,716✔
253
        this.set_object_hook(object_hook)?;
4,704✔
254
        this.set_array_hook(array_hook)?;
4,692✔
255
        this.set_str_errors(&bound_str_errors)?;
4,680✔
256
        Ok(this)
4,668✔
257
    }
4,740✔
258

259
    fn read_from_fp<'py>(
468✔
260
        &mut self,
468✔
261
        py: Python<'py>,
468✔
262
        minimum_amount: usize,
468✔
263
    ) -> PyResult<(Bound<'py, PyBytes>, usize)> {
468✔
264
        let read_size: usize = if self.fp_is_seekable {
468✔
265
            self.read_size
336✔
266
        } else {
267
            1
132✔
268
        };
269
        let bytes_to_read = minimum_amount.max(read_size);
468✔
270
        let num_read_bytes = if let Some(read) = self.read_method.as_ref() {
468✔
271
            let bytes_from_fp: Bound<PyBytes> =
360✔
272
                read.bind(py).call1((&bytes_to_read,))?.cast_into()?;
360✔
273
            let num_read_bytes = bytes_from_fp.len()?;
360✔
274
            if num_read_bytes >= minimum_amount {
360✔
275
                return Ok((bytes_from_fp, num_read_bytes));
312✔
276
            }
48✔
277
            num_read_bytes
48✔
278
        } else {
279
            0
108✔
280
        };
281
        Err(CBORDecodeEOF::new_err(format!(
156✔
282
            "premature end of stream (expected to read at least {minimum_amount} \
156✔
283
                 bytes, got {num_read_bytes} instead)"
156✔
284
        )))
156✔
285
    }
468✔
286

287
    fn read_exact<const N: usize>(&mut self, py: Python<'_>) -> PyResult<[u8; N]> {
38,640✔
288
        if self.available_bytes == 0 {
38,640✔
289
            // No buffer
290
            let (new_bytes, amount_read) = self.read_from_fp(py, N)?;
276✔
291
            self.read_position = N;
252✔
292
            self.available_bytes = amount_read - N;
252✔
293
            self.buffer = Some(new_bytes.unbind());
252✔
294
            Ok(self.buffer.as_ref().unwrap().as_bytes(py)[..N].try_into()?)
252✔
295
        } else if self.available_bytes < N {
38,364✔
296
            // Combine the remnants of the partial buffer with new data read from the file
297
            let needed_bytes = N - self.available_bytes;
24✔
298
            let mut concatenated_buffer: Vec<u8> = self.buffer.take().unwrap().extract(py)?;
24✔
299
            if self.read_position > 0 {
24✔
300
                concatenated_buffer.drain(..self.read_position);
24✔
301
            }
24✔
302
            concatenated_buffer.truncate(self.available_bytes);
24✔
303
            let (new_bytes, amount_read) = self.read_from_fp(py, needed_bytes)?;
24✔
304
            concatenated_buffer.extend_from_slice(&new_bytes[..needed_bytes]);
24✔
305
            self.buffer = Some(new_bytes.unbind());
24✔
306
            self.available_bytes = amount_read - needed_bytes;
24✔
307
            self.read_position = needed_bytes;
24✔
308
            Ok(concatenated_buffer
24✔
309
                .try_into()
24✔
310
                .expect("buffer size mismatch"))
24✔
311
        } else {
312
            // Return a slice from the existing bytes object
313
            let slice: [u8; N] = self.buffer.as_ref().unwrap().bind(py).as_bytes()
38,340✔
314
                [self.read_position..self.read_position + N]
38,340✔
315
                .try_into()?;
38,340✔
316
            self.available_bytes -= N;
38,340✔
317
            self.read_position += N;
38,340✔
318
            Ok(slice)
38,340✔
319
        }
320
    }
38,640✔
321

322
    fn read_major_and_subtype(&mut self, py: Python<'_>) -> PyResult<(u8, u8)> {
28,040✔
323
        let initial_byte = self.read_exact::<1>(py)?[0];
28,040✔
324
        let major_type = initial_byte >> 5;
28,016✔
325
        let subtype = initial_byte & 31;
28,016✔
326
        Ok((major_type, subtype))
28,016✔
327
    }
28,040✔
328

329
    fn decode_length_finite(&mut self, py: Python<'_>, subtype: u8) -> PyResult<u64> {
7,092✔
330
        match self.decode_length(py, subtype)? {
7,092✔
331
            Some(length) => Ok(length),
7,056✔
332
            None => Err(CBORDecodeError::new_err(
24✔
333
                "indefinite length not allowed here",
24✔
334
            )),
24✔
335
        }
336
    }
7,092✔
337

338
    /// Like [`decode_length`], but converts `Some(u64)` to `Some(usize)`, returning
339
    /// a [`CBORDecodeError`] if the value exceeds the platform's address space.
340
    fn decode_length_as_usize(&mut self, py: Python<'_>, subtype: u8) -> PyResult<Option<usize>> {
12,820✔
341
        match self.decode_length(py, subtype)? {
12,820✔
342
            Some(length) => usize::try_from(length).map(Some).map_err(|_| {
12,412✔
343
                CBORDecodeError::new_err(format!(
×
344
                    "huge item length {length} exceeds the system address space"
345
                ))
346
            }),
×
347
            None => Ok(None),
396✔
348
        }
349
    }
12,820✔
350

351
    //
352
    // Decoders for major tags (0-7)
353
    //
354

355
    /// Decode the length of the next item.
356
    ///
357
    /// This is a low-level operation that may be needed by custom decoder callbacks.
358
    ///
359
    /// :param subtype:
360
    /// :return: the length of the item, or :data:`None` to indicate an indefinite-length item
361
    fn decode_length(&mut self, py: Python<'_>, subtype: u8) -> PyResult<Option<u64>> {
19,912✔
362
        let length = match subtype {
19,912✔
363
            ..24 => Some(subtype as u64),
19,912✔
364
            24 => Some(self.read_exact::<1>(py)?[0] as u64),
2,088✔
365
            25 => Some(u16::from_be_bytes(self.read_exact(py)?) as u64),
1,120✔
366
            26 => Some(u32::from_be_bytes(self.read_exact(py)?) as u64),
300✔
367
            27 => Some(u64::from_be_bytes(self.read_exact(py)?)),
296✔
368
            31 => {
369
                if !self.allow_indefinite {
432✔
370
                    return Err(CBORDecodeError::new_err(
12✔
371
                        "encountered indefinite length but it has been disabled",
12✔
372
                    ));
12✔
373
                }
420✔
374
                None
420✔
375
            }
376
            _ => {
377
                return Err(CBORDecodeError::new_err(format!(
12✔
378
                    "unknown unsigned integer subtype 0x{subtype:x}"
12✔
379
                )));
12✔
380
            }
381
        };
382
        Ok(length)
19,888✔
383
    }
19,912✔
384

385
    fn decode_uint<'py>(&mut self, py: Python<'py>, subtype: u8) -> PyResult<DecoderResult<'py>> {
3,456✔
386
        // Major tag 0
387
        let uint: u64 = self.decode_length_finite(py, subtype)?;
3,456✔
388
        Ok(Value(uint.into_bound_py_any(py)?))
3,444✔
389
    }
3,456✔
390

391
    fn decode_negint<'py>(&mut self, py: Python<'py>, subtype: u8) -> PyResult<DecoderResult<'py>> {
380✔
392
        // Major tag 1
393
        let uint: u64 = self.decode_length_finite(py, subtype)?;
380✔
394
        let signed_int = -(uint as i128) - 1;
380✔
395
        Ok(Value(signed_int.into_bound_py_any(py)?))
380✔
396
    }
380✔
397

398
    fn decode_bytestring<'py>(
1,212✔
399
        &mut self,
1,212✔
400
        py: Python<'py>,
1,212✔
401
        subtype: u8,
1,212✔
402
    ) -> PyResult<DecoderResult<'py>> {
1,212✔
403
        // Major tag 2
404
        match self.decode_length_as_usize(py, subtype)? {
1,212✔
405
            None => {
406
                // Indefinite length
407
                let mut bytes = PyBytes::new(py, b"");
72✔
408
                let sys_maxsize = *SYS_MAXSIZE.get(py).unwrap();
72✔
409
                loop {
410
                    let (major_type, subtype) = self.read_major_and_subtype(py)?;
120✔
411
                    match (major_type, subtype) {
120✔
412
                        (2, _) => {
413
                            let length = self.decode_length_finite(py, subtype)?;
84✔
414
                            if length > sys_maxsize {
72✔
415
                                return Err(CBORDecodeError::new_err(format!(
12✔
416
                                    "chunk too long in an indefinite bytestring chunk: {length}"
12✔
417
                                )));
12✔
418
                            }
60✔
419
                            let length = length as usize;
60✔
420
                            let chunk = self.read(py, length)?;
60✔
421
                            bytes = bytes.add(chunk)?.cast_into()?;
48✔
422
                        }
423
                        (7, 31) => break Ok(Value(bytes.into_any())), // break marker
12✔
424
                        _ => {
425
                            return Err(CBORDecodeError::new_err(format!(
24✔
426
                                "non-byte string (major type {major_type}) found in indefinite \
24✔
427
                                    length byte string"
24✔
428
                            )));
24✔
429
                        }
430
                    }
431
                }
432
            }
433
            Some(length) if length <= 65536 => {
1,140✔
434
                let bytes = self.read(py, length)?;
1,116✔
435
                Ok(StringValue(PyBytes::new(py, &bytes).into_any(), length))
1,080✔
436
            }
437
            Some(length) => {
24✔
438
                // Incrementally read the bytestring, in chunks of 65536 bytes
439
                let mut bytes = PyBytes::new(py, b"");
24✔
440
                let mut remaining_length = length;
24✔
441
                while remaining_length > 0 {
48✔
442
                    let chunk_size = remaining_length.min(65536);
36✔
443
                    let chunk = self.read(py, chunk_size)?;
36✔
444
                    remaining_length -= chunk_size;
24✔
445
                    bytes = bytes.add(chunk)?.cast_into()?;
24✔
446
                }
447
                Ok(StringValue(bytes.into_any(), length))
12✔
448
            }
449
        }
450
    }
1,212✔
451

452
    fn decode_string<'py>(&mut self, py: Python<'py>, subtype: u8) -> PyResult<DecoderResult<'py>> {
2,516✔
453
        // Major tag 3
454
        match self.decode_length_as_usize(py, subtype)? {
2,516✔
455
            None => {
456
                // Indefinite length
457
                let mut string = PyString::new(py, "");
96✔
458
                loop {
459
                    let (major_type, subtype) = self.read_major_and_subtype(py)?;
168✔
460
                    let sys_maxsize = *SYS_MAXSIZE.get(py).unwrap();
168✔
461
                    match (major_type, subtype) {
168✔
462
                        (3, _) => {
463
                            let length = self.decode_length_finite(py, subtype)?;
120✔
464
                            if length > sys_maxsize {
108✔
465
                                return Err(CBORDecodeError::new_err(format!(
12✔
466
                                    "chunk too long in an indefinite text string chunk: {length}"
12✔
467
                                )));
12✔
468
                            }
96✔
469
                            let length = length as usize;
96✔
470
                            let bytes = self.read(py, length)?;
96✔
471
                            let decoded = match self.str_errors.as_ref() {
84✔
472
                                None => PyString::from_bytes(py, bytes.as_slice()),
84✔
473
                                Some(str_errors) => bytes
×
474
                                    .into_bound_py_any(py)?
×
475
                                    .call_method1(
×
476
                                        intern!(py, "decode"),
×
477
                                        (intern!(py, "utf-8"), str_errors),
×
478
                                    )
479
                                    .and_then(|string| string.cast_into().map_err(PyErr::from)),
×
480
                            }?;
12✔
481
                            string = string.add(decoded)?.cast_into()?;
72✔
482
                        }
483
                        (7, 31) => break Ok(Value(string.into_any())), // break marker
24✔
484
                        _ => {
485
                            return Err(CBORDecodeError::new_err(format!(
24✔
486
                                "non-text string (major type {major_type}) found in indefinite \
24✔
487
                                    length text string"
24✔
488
                            )));
24✔
489
                        }
490
                    }
491
                }
492
            }
493
            Some(length) if length <= 65536 => {
2,408✔
494
                let bytes = self.read(py, length)?;
2,348✔
495
                let decoded_string: Bound<'_, PyAny> = match self.str_errors.as_ref() {
2,300✔
496
                    None => PyString::from_bytes(py, bytes.as_slice())?.into_any(),
2,276✔
497
                    Some(str_errors) => bytes.into_bound_py_any(py)?.call_method1(
24✔
498
                        intern!(py, "decode"),
24✔
499
                        (intern!(py, "utf-8"), str_errors.bind(py)),
24✔
500
                    )?,
×
501
                };
502
                Ok(StringValue(decoded_string, length))
2,264✔
503
            }
504
            Some(length) => {
60✔
505
                // Incrementally decode the string, in chunks of 65536 bytes
506
                let decoder_class = INCREMENTAL_UTF8_DECODER
60✔
507
                    .get_or_try_init(py, || -> PyResult<Py<PyAny>> {
60✔
508
                        let decoder = py
12✔
509
                            .import("codecs")?
12✔
510
                            .getattr("lookup")?
12✔
511
                            .call1(("utf-8",))?
12✔
512
                            .getattr("incrementaldecoder")?;
12✔
513
                        Ok(decoder.unbind())
12✔
514
                    })?
12✔
515
                    .bind(py);
60✔
516
                let decoder = match self.str_errors.as_ref() {
60✔
517
                    None => decoder_class.call0()?,
36✔
518
                    Some(str_errors) => decoder_class.call1((str_errors,))?,
24✔
519
                };
520
                let mut string = PyString::new(py, "").into_any();
60✔
521
                let mut remaining_length = length;
60✔
522
                while remaining_length > 0 {
204✔
523
                    let chunk_size = remaining_length.min(65536);
144✔
524
                    let chunk = self.read(py, chunk_size)?;
144✔
525
                    remaining_length -= chunk_size;
144✔
526
                    let is_final_chunk = remaining_length == 0;
144✔
527
                    let decoded_chunk =
144✔
528
                        decoder.call_method1(intern!(py, "decode"), (chunk, is_final_chunk))?;
144✔
529
                    string = string.add(decoded_chunk)?;
144✔
530
                }
531
                Ok(StringValue(string.into_any(), length))
60✔
532
            }
533
        }
534
    }
2,516✔
535

536
    fn decode_array<'py>(
7,208✔
537
        &mut self,
7,208✔
538
        py: Python<'py>,
7,208✔
539
        subtype: u8,
7,208✔
540
        immutable: bool,
7,208✔
541
    ) -> PyResult<DecoderResult<'py>> {
7,208✔
542
        // Major tag 4
543

544
        // Apply the array_hook (if any) to a completed array, passing whether it used
545
        // indefinite-length encoding. This lets callers preserve that distinction, which is
546
        // otherwise lost once an array is decoded into a plain list/tuple.
547
        #[inline]
548
        fn maybe_call_array_hook<'py>(
2,180✔
549
            py: Python<'py>,
2,180✔
550
            value: Bound<'py, PyAny>,
2,180✔
551
            indefinite: bool,
2,180✔
552
            array_hook: Option<&Py<PyAny>>,
2,180✔
553
        ) -> PyResult<Bound<'py, PyAny>> {
2,180✔
554
            if let Some(array_hook) = array_hook {
2,180✔
555
                array_hook.bind(py).call1((value, indefinite))
96✔
556
            } else {
557
                Ok(value)
2,084✔
558
            }
559
        }
2,180✔
560

561
        // `optional_length == None` signals an indefinite-length array; the definite-length
562
        // completion sites below pass `false` and the indefinite (break-marker) sites pass `true`.
563
        let optional_length = self.decode_length_as_usize(py, subtype)?;
7,208✔
564
        let array_hook = self.array_hook.as_ref().map(|hook| hook.clone_ref(py));
7,208✔
565
        if immutable {
7,208✔
566
            let mut items: Vec<Bound<'py, PyAny>> = Vec::new();
1,004✔
567
            let callback: Box<DecoderCallback<'py>> = if let Some(length) = optional_length {
1,004✔
568
                if length == 0 {
968✔
569
                    let value = PyTuple::empty(py).into_any();
56✔
570
                    return Ok(Value(maybe_call_array_hook(
56✔
571
                        py,
56✔
572
                        value,
56✔
573
                        false,
574
                        array_hook.as_ref(),
56✔
NEW
575
                    )?));
×
576
                }
912✔
577

578
                Box::new(move |item: Bound<'py, PyAny>, _immutable: bool| {
1,916✔
579
                    items.push(item);
1,916✔
580
                    if items.len() == length {
1,916✔
581
                        let value = PyTuple::new(py, take(&mut items))?.into_any();
864✔
582
                        Ok(CompleteFrame(maybe_call_array_hook(
864✔
583
                            py,
864✔
584
                            value,
864✔
585
                            false,
586
                            array_hook.as_ref(),
864✔
NEW
587
                        )?))
×
588
                    } else {
589
                        Ok(ContinueFrame(false))
1,052✔
590
                    }
591
                })
1,916✔
592
            } else {
593
                let break_marker = BREAK_MARKER.get(py).unwrap().bind(py);
36✔
594
                Box::new(move |item: Bound<'py, PyAny>, _immutable: bool| {
120✔
595
                    if item.is(break_marker) {
120✔
596
                        let value = PyTuple::new(py, take(&mut items))?.into_any();
24✔
597
                        Ok(CompleteFrame(maybe_call_array_hook(
24✔
598
                            py,
24✔
599
                            value,
24✔
600
                            true,
601
                            array_hook.as_ref(),
24✔
NEW
602
                        )?))
×
603
                    } else {
604
                        items.push(item);
96✔
605
                        Ok(ContinueFrame(false))
96✔
606
                    }
607
                })
120✔
608
            };
609
            Ok(BeginFrame(
948✔
610
                callback,
948✔
611
                false,
948✔
612
                None,
948✔
613
                DisplayName::String("array"),
948✔
614
            ))
948✔
615
        } else {
616
            let mut list = PyList::empty(py);
6,204✔
617
            let container = list.clone().into_any();
6,204✔
618
            let callback: Box<DecoderCallback<'py>> = if let Some(length) = optional_length {
6,204✔
619
                if length == 0 {
6,072✔
620
                    let value = PyList::empty(py).into_any();
132✔
621
                    return Ok(Value(maybe_call_array_hook(
132✔
622
                        py,
132✔
623
                        value,
132✔
624
                        false,
625
                        array_hook.as_ref(),
132✔
NEW
626
                    )?));
×
627
                }
5,940✔
628

629
                Box::new(move |item, _immutable: bool| {
8,092✔
630
                    list.append(item)?;
8,092✔
631
                    if list.len() == length {
8,092✔
632
                        let value = replace(&mut list, PyList::empty(py)).into_any();
972✔
633
                        Ok(CompleteFrame(maybe_call_array_hook(
972✔
634
                            py,
972✔
635
                            value,
972✔
636
                            false,
637
                            array_hook.as_ref(),
972✔
638
                        )?))
12✔
639
                    } else {
640
                        Ok(ContinueFrame(false))
7,120✔
641
                    }
642
                })
8,092✔
643
            } else {
644
                let break_marker = BREAK_MARKER.get(py).unwrap().bind(py);
132✔
645
                Box::new(move |item: Bound<'py, PyAny>, _immutable: bool| {
696✔
646
                    if item.is(break_marker) {
696✔
647
                        let value = replace(&mut list, PyList::empty(py)).into_any();
132✔
648
                        Ok(CompleteFrame(maybe_call_array_hook(
132✔
649
                            py,
132✔
650
                            value,
132✔
651
                            true,
652
                            array_hook.as_ref(),
132✔
NEW
653
                        )?))
×
654
                    } else {
655
                        list.append(item)?;
564✔
656
                        Ok(ContinueFrame(false))
564✔
657
                    }
658
                })
696✔
659
            };
660
            Ok(BeginFrame(
6,072✔
661
                callback,
6,072✔
662
                false,
6,072✔
663
                Some(container),
6,072✔
664
                DisplayName::String("array"),
6,072✔
665
            ))
6,072✔
666
        }
667
    }
7,208✔
668

669
    fn decode_map<'py>(
1,884✔
670
        &mut self,
1,884✔
671
        py: Python<'py>,
1,884✔
672
        subtype: u8,
1,884✔
673
        immutable: bool,
1,884✔
674
    ) -> PyResult<DecoderResult<'py>> {
1,884✔
675
        // Major tag 5
676

677
        #[cfg(Py_3_15)]
678
        fn create_frozen_dict<'py>(
12✔
679
            py: Python<'py>,
12✔
680
            items: Vec<(Bound<'py, PyAny>, Bound<'py, PyAny>)>,
12✔
681
        ) -> PyResult<Bound<'py, PyAny>> {
12✔
682
            FROZEN_DICT
12✔
683
                .get(py)?
12✔
684
                .call1((items,))?
12✔
685
                .cast_into()
12✔
686
                .map_err(|e| PyErr::from(e))
12✔
687
        }
12✔
688
        #[cfg(not(Py_3_15))]
689
        fn create_frozen_dict<'py>(
132✔
690
            py: Python<'py>,
132✔
691
            items: Vec<(Bound<'py, PyAny>, Bound<'py, PyAny>)>,
132✔
692
        ) -> PyResult<Bound<'py, PyAny>> {
132✔
693
            FrozenDict::from_items(py, items).map(|dict| dict.into_any())
132✔
694
        }
132✔
695

696
        #[inline]
697
        fn maybe_call_object_hook<'py>(
1,680✔
698
            py: Python<'py>,
1,680✔
699
            dict: Bound<'py, PyAny>,
1,680✔
700
            object_hook: Option<&Py<PyAny>>,
1,680✔
701
            immutable: bool,
1,680✔
702
        ) -> PyResult<Bound<'py, PyAny>> {
1,680✔
703
            if let Some(object_hook) = object_hook {
1,680✔
704
                object_hook.bind(py).call1((dict, immutable))
24✔
705
            } else {
706
                Ok(dict)
1,656✔
707
            }
708
        }
1,680✔
709

710
        let object_hook = self.object_hook.as_ref().map(|hook| hook.clone_ref(py));
1,884✔
711
        let allow_duplicate_keys = self.allow_duplicate_keys;
1,884✔
712
        let length_or_none = self.decode_length_as_usize(py, subtype)?;
1,884✔
713

714
        // Return immediately if this is an empty dict
715
        if let Some(length) = length_or_none
1,884✔
716
            && length == 0
1,824✔
717
        {
718
            let container: Bound<'py, PyAny> = if immutable {
420✔
719
                create_frozen_dict(py, Vec::new())?
×
720
            } else {
721
                PyDict::new(py).into_any()
420✔
722
            };
723
            let transformed =
420✔
724
                maybe_call_object_hook(py, container, object_hook.as_ref(), immutable)?;
420✔
725
            return Ok(Value(transformed));
420✔
726
        };
1,464✔
727

728
        let mut key: Option<Bound<'py, PyAny>> = None;
1,464✔
729
        if immutable {
1,464✔
730
            let seen_keys: Option<Bound<'py, PySet>> = if allow_duplicate_keys {
300✔
731
                None
276✔
732
            } else {
733
                Some(PySet::empty(py)?)
24✔
734
            };
735
            let check_duplicate = move |key: &Bound<'py, PyAny>| -> PyResult<()> {
300✔
736
                let seen = seen_keys.as_ref().unwrap();
48✔
737
                if seen.contains(key)? {
48✔
738
                    let repr = key.repr()?;
24✔
739
                    return Err(CBORDecodeError::new_err(format!(
24✔
740
                        "Duplicate map key: {}",
741
                        repr.to_str()?
24✔
742
                    )));
743
                }
24✔
744
                seen.add(key.clone())
24✔
745
            };
48✔
746

747
            let mut items: Vec<(Bound<'py, PyAny>, Bound<'py, PyAny>)> = Vec::new();
300✔
748
            let callback: Box<DecoderCallback<'py>> = if let Some(length) = length_or_none {
300✔
749
                Box::new(move |item: Bound<'py, PyAny>, _immutable: bool| {
396✔
750
                    if let Some(key) = key.take() {
396✔
751
                        if !allow_duplicate_keys {
192✔
752
                            check_duplicate(&key)?;
24✔
753
                        }
168✔
754
                        items.push((key, item));
180✔
755
                        if items.len() == length {
180✔
756
                            let transformed = maybe_call_object_hook(
144✔
757
                                py,
144✔
758
                                create_frozen_dict(py, take(&mut items))?,
144✔
759
                                object_hook.as_ref(),
144✔
760
                                immutable,
144✔
761
                            )?;
×
762
                            return Ok(CompleteFrame(transformed));
144✔
763
                        }
36✔
764
                        Ok(ContinueFrame(true))
36✔
765
                    } else {
766
                        key = Some(item);
204✔
767
                        Ok(ContinueFrame(false))
204✔
768
                    }
769
                })
396✔
770
            } else {
771
                let break_marker = BREAK_MARKER.get(py).unwrap().bind(py);
12✔
772
                Box::new(move |item: Bound<'py, PyAny>, _immutable: bool| {
48✔
773
                    if item.is(break_marker) {
48✔
774
                        let container = create_frozen_dict(py, take(&mut items))?;
×
775
                        let transformed = maybe_call_object_hook(
×
776
                            py,
×
777
                            container.into_any(),
×
778
                            object_hook.as_ref(),
×
779
                            immutable,
×
780
                        )?;
×
781
                        Ok(CompleteFrame(transformed))
×
782
                    } else if let Some(key) = key.take() {
48✔
783
                        if !allow_duplicate_keys {
24✔
784
                            check_duplicate(&key)?;
24✔
785
                        }
×
786
                        items.push((key, item));
12✔
787
                        Ok(ContinueFrame(true))
12✔
788
                    } else {
789
                        key = Some(item);
24✔
790
                        Ok(ContinueFrame(false))
24✔
791
                    }
792
                })
48✔
793
            };
794
            Ok(BeginFrame(callback, true, None, DisplayName::String("map")))
300✔
795
        } else {
796
            fn check_duplicate(key: &Bound<PyAny>, dict: &Bound<PyDict>) -> PyResult<()> {
48✔
797
                if dict.contains(key)? {
48✔
798
                    let repr = key.repr()?;
24✔
799
                    return Err(CBORDecodeError::new_err(format!(
24✔
800
                        "Duplicate map key: {}",
801
                        repr.to_str()?
24✔
802
                    )));
803
                }
24✔
804
                Ok(())
24✔
805
            }
48✔
806

807
            let mut dict = PyDict::new(py);
1,164✔
808
            let container = dict.clone().into_any();
1,164✔
809
            let callback: Box<DecoderCallback<'py>> = if let Some(length) = length_or_none {
1,164✔
810
                let mut count = 0usize;
1,116✔
811
                Box::new(move |item: Bound<'py, PyAny>, _immutable: bool| {
3,544✔
812
                    if let Some(key) = key.take() {
3,544✔
813
                        if !allow_duplicate_keys {
1,772✔
814
                            check_duplicate(&key, &dict)?;
24✔
815
                        }
1,748✔
816
                        dict.set_item(&key, item)?;
1,760✔
817
                        count += 1;
1,760✔
818
                        if count == length {
1,760✔
819
                            let dict = replace(&mut dict, PyDict::new(py));
1,080✔
820
                            let transformed = maybe_call_object_hook(
1,080✔
821
                                py,
1,080✔
822
                                dict.into_any(),
1,080✔
823
                                object_hook.as_ref(),
1,080✔
824
                                immutable,
1,080✔
825
                            )?;
12✔
826
                            return Ok(CompleteFrame(transformed));
1,068✔
827
                        }
680✔
828
                        Ok(ContinueFrame(true))
680✔
829
                    } else {
830
                        key = Some(item);
1,772✔
831
                        Ok(ContinueFrame(false))
1,772✔
832
                    }
833
                })
3,544✔
834
            } else {
835
                let break_marker = BREAK_MARKER.get(py).unwrap().bind(py);
48✔
836
                Box::new(move |item: Bound<'py, PyAny>, _immutable: bool| {
204✔
837
                    if item.is(break_marker) {
204✔
838
                        let dict = replace(&mut dict, PyDict::new(py));
36✔
839
                        let transformed = maybe_call_object_hook(
36✔
840
                            py,
36✔
841
                            dict.into_any(),
36✔
842
                            object_hook.as_ref(),
36✔
843
                            immutable,
36✔
844
                        )?;
×
845
                        Ok(CompleteFrame(transformed))
36✔
846
                    } else if let Some(key) = key.take() {
168✔
847
                        if !allow_duplicate_keys {
84✔
848
                            check_duplicate(&key, &dict)?;
24✔
849
                        }
60✔
850
                        dict.set_item(&key, item)?;
72✔
851
                        Ok(ContinueFrame(true))
72✔
852
                    } else {
853
                        key = Some(item);
84✔
854
                        Ok(ContinueFrame(false))
84✔
855
                    }
856
                })
204✔
857
            };
858
            Ok(BeginFrame(
1,164✔
859
                callback,
1,164✔
860
                true,
1,164✔
861
                Some(container),
1,164✔
862
                DisplayName::String("map"),
1,164✔
863
            ))
1,164✔
864
        }
865
    }
1,884✔
866

867
    fn decode_semantic<'py>(
3,052✔
868
        &mut self,
3,052✔
869
        py: Python<'py>,
3,052✔
870
        subtype: u8,
3,052✔
871
        immutable: bool,
3,052✔
872
    ) -> PyResult<DecoderResult<'py>> {
3,052✔
873
        let tagnum = self.decode_length_finite(py, subtype)?;
3,052✔
874
        if let Some(semantic_decoders) = &self.semantic_decoders {
3,052✔
875
            match semantic_decoders.bind(py).get_item(tagnum) {
120✔
876
                Ok(decoder) => {
96✔
877
                    let name = decoder.getattr_opt(intern!(py, NAME_ATTR))?;
96✔
878

879
                    // If these attributes are present, this callable was decorated with
880
                    // @shareable_decoder
881
                    return if let Some(name) = name {
96✔
882
                        let require_immutable: bool = decoder
60✔
883
                            .getattr_opt(intern!(py, IMMUTABLE_ATTR))?
60✔
884
                            .map(|x| x.is_truthy())
60✔
885
                            .transpose()?
60✔
886
                            .unwrap_or(false);
60✔
887
                        let retval = decoder.call1((immutable,))?;
60✔
888
                        let tuple: Bound<'_, PyTuple> = retval.cast_into()?;
60✔
889
                        if tuple.len() != 2 {
60✔
890
                            return Err(CBORDecodeError::new_err(format!(
×
891
                                "{decoder} returned a tuple of {} items, expected 2",
×
892
                                tuple.len()
×
893
                            )));
×
894
                        }
60✔
895
                        let container: Bound<'_, PyAny> = tuple.get_item(0)?.cast_into()?;
60✔
896
                        let callback: Bound<'_, PyAny> = tuple.get_item(1)?.cast_into()?;
60✔
897
                        Ok(BeginFrame(
898
                            Box::new(
60✔
899
                                move |item, _immutable: bool| -> PyResult<DecoderResult<'py>> {
60✔
900
                                    callback.call1((item,)).map(CompleteFrame)
60✔
901
                                },
60✔
902
                            ),
903
                            require_immutable,
60✔
904
                            if container.is_none() {
60✔
905
                                None
48✔
906
                            } else {
907
                                Some(container)
12✔
908
                            },
909
                            if name.is_none() {
60✔
910
                                DisplayName::SemanticTag(tagnum)
12✔
911
                            } else {
912
                                DisplayName::PythonName(name.clone())
48✔
913
                            },
914
                        ))
915
                    } else {
916
                        let callback =
36✔
917
                            move |item, new_immutable: bool| -> PyResult<DecoderResult<'py>> {
36✔
918
                                decoder.call1((item, new_immutable)).map(CompleteFrame)
36✔
919
                            };
36✔
920
                        Ok(BeginFrame(
36✔
921
                            Box::new(callback),
36✔
922
                            immutable,
36✔
923
                            None,
36✔
924
                            DisplayName::SemanticTag(tagnum),
36✔
925
                        ))
36✔
926
                    };
927
                }
928
                Err(e) if e.is_instance_of::<PyLookupError>(py) => {}
24✔
929
                Err(e) => return Err(e),
×
930
            }
931
        };
2,932✔
932

933
        // No semantic decoder lookup map – fall back to the hard coded switchboard
934
        let (callback, typename): (Box<DecoderCallback<'py>>, &str) = match tagnum {
2,956✔
935
            0 => (
604✔
936
                Box::new(Self::decode_datetime_string),
604✔
937
                "string-form datetime",
604✔
938
            ),
604✔
939
            1 => (Box::new(Self::decode_epoch_datetime), "epoch-form datetime"),
60✔
940
            2 => (Box::new(Self::decode_positive_bignum), "positive bignum"),
84✔
941
            3 => (Box::new(Self::decode_negative_bignum), "negative bignum"),
36✔
942
            4 => (Box::new(Self::decode_fraction), "decimal fraction"),
112✔
943
            5 => (Box::new(Self::decode_bigfloat), "bigfloat"),
24✔
944
            25 => (Box::new(Self::decode_stringref), "string reference"),
108✔
945
            28 => return Ok(Shareable),
240✔
946
            29 => (Box::new(Self::decode_sharedref), "shared reference"),
180✔
947
            30 => (Box::new(Self::decode_rational), "rational"),
100✔
948
            35 => (Box::new(Self::decode_regexp), "regular expression"),
36✔
949
            36 => (Box::new(Self::decode_mime), "MIME message"),
36✔
950
            37 => (Box::new(Self::decode_uuid), "UUID"),
200✔
951
            52 => (Box::new(Self::decode_ipv4), "IPv4 address"),
160✔
952
            54 => (Box::new(Self::decode_ipv6), "IPv6 address"),
96✔
953
            100 => (Box::new(Self::decode_epoch_date), "epoch-form date"),
12✔
954
            256 => return Ok(StringNamespace),
48✔
955
            258 => return self.decode_set(py, immutable),
268✔
956
            260 => (Box::new(Self::decode_ipaddress), "IP address"),
84✔
957
            261 => (Box::new(Self::decode_ipnetwork), "IP network"),
84✔
958
            1004 => (Box::new(Self::decode_date_string), "string-form date"),
12✔
959
            43000 => (Box::new(Self::decode_complex), "complex number"),
252✔
960
            55799 => (
24✔
961
                Box::new(Self::decode_self_describe_cbor),
24✔
962
                "self-described CBOR value",
24✔
963
            ),
24✔
964
            _ => {
965
                // For a tag with no designated decoder, check if we have a tag hook, and call
966
                // that with the tag object, using its return value as the decoded value.
967
                let tag = CBORTag::new(tagnum.into_bound_py_any(py)?, py.None().into_bound(py))?;
96✔
968
                let bound_tag = Bound::new(py, tag)?.into_any();
96✔
969
                let container = bound_tag.clone();
96✔
970
                let mut tag_hook = self
96✔
971
                    .tag_hook
96✔
972
                    .as_ref()
96✔
973
                    .map(|hook| hook.clone_ref(py).into_bound(py));
96✔
974
                let callback = Box::new(move |item: Bound<'py, PyAny>, _immutable: bool| {
96✔
975
                    let tag: &Bound<'py, CBORTag> = bound_tag.cast()?;
84✔
976
                    tag.borrow_mut().value = item.unbind();
84✔
977
                    if let Some(tag_hook) = tag_hook.take() {
84✔
978
                        tag_hook.call1((&bound_tag, immutable)).map(CompleteFrame)
60✔
979
                    } else {
980
                        Ok(CompleteFrame(bound_tag.clone()))
24✔
981
                    }
982
                });
84✔
983
                return Ok(BeginFrame(
96✔
984
                    callback,
96✔
985
                    true,
96✔
986
                    Some(container),
96✔
987
                    DisplayName::SemanticTag(tagnum),
96✔
988
                ));
96✔
989
            }
990
        };
991
        Ok(BeginFrame(
2,304✔
992
            callback,
2,304✔
993
            true,
2,304✔
994
            None,
2,304✔
995
            DisplayName::String(typename),
2,304✔
996
        ))
2,304✔
997
    }
3,052✔
998

999
    fn decode_special<'py>(
8,020✔
1000
        &mut self,
8,020✔
1001
        py: Python<'py>,
8,020✔
1002
        subtype: u8,
8,020✔
1003
    ) -> PyResult<DecoderResult<'py>> {
8,020✔
1004
        // Major tag 7
1005
        match subtype {
8,020✔
1006
            0..20 => {
8,020✔
1007
                let value = subtype.into_pyobject(py)?;
72✔
1008
                CBORSimpleValue::new(value)?.into_bound_py_any(py)
72✔
1009
            }
1010
            20 => Ok(false.into_bound_py_any(py)?),
128✔
1011
            21 => Ok(true.into_bound_py_any(py)?),
180✔
1012
            22 => Ok(py.None().into_bound_py_any(py)?),
580✔
1013
            23 => Ok(UNDEFINED.get(py).unwrap().into_bound_py_any(py)?),
24✔
1014
            24 => {
1015
                let value = self.read_exact::<1>(py)?[0];
84✔
1016
                if value < 0x20 {
84✔
1017
                    return Err(CBORDecodeError::new_err(
36✔
1018
                        "invalid two-byte sequence for simple value",
36✔
1019
                    ));
36✔
1020
                }
48✔
1021
                CBORSimpleValue::new(value.into_pyobject(py)?)?.into_bound_py_any(py)
48✔
1022
            }
1023
            25 => {
1024
                let bytes = self.read_exact::<2>(py)?;
740✔
1025
                f16::from_be_bytes(bytes).to_f32().into_bound_py_any(py)
740✔
1026
            }
1027
            26 => {
1028
                let bytes = self.read_exact::<4>(py)?;
108✔
1029
                f32::from_be_bytes(bytes).into_bound_py_any(py)
108✔
1030
            }
1031
            27 => {
1032
                let bytes = self.read_exact::<8>(py)?;
5,864✔
1033
                f64::from_be_bytes(bytes).into_bound_py_any(py)
5,864✔
1034
            }
1035
            31 => Ok(BREAK_MARKER.get(py).unwrap().into_bound_py_any(py)?),
204✔
1036
            _ => Err(CBORDecodeError::new_err(format!(
36✔
1037
                "undefined reserved major type 7 subtype 0x{subtype:x}"
36✔
1038
            ))),
36✔
1039
        }
1040
        .map(Value)
7,984✔
1041
    }
8,020✔
1042

1043
    //
1044
    // Decoders for semantic tags (major tag 6)
1045
    //
1046

1047
    fn decode_datetime_string<'py>(
604✔
1048
        value: Bound<'py, PyAny>,
604✔
1049
        _immutable: bool,
604✔
1050
    ) -> PyResult<DecoderResult<'py>> {
604✔
1051
        // Semantic tag 0
1052
        let py = value.py();
604✔
1053
        let value_type = value.get_type();
604✔
1054
        let mut datetime_str: Bound<'py, PyString> = value.cast_into().map_err(|e| {
604✔
1055
            create_exc_from(
×
1056
                py,
×
1057
                CBORDecodeError::new_err(format!(
×
1058
                    "expected string for tag, got {} instead",
1059
                    value_type
1060
                )),
1061
                Some(PyErr::from(e)),
×
1062
            )
1063
        })?;
×
1064

1065
        // Python 3.10 has impaired parsing of the ISO format:
1066
        // * It doesn't handle the standard "Z" suffix
1067
        // * It doesn't handle the fractional seconds part having fewer than 6 digits
1068
        if py.version_info() <= (3, 10) {
604✔
1069
            // Convert Z to +00:00
1070
            let mut temp_str = datetime_str.to_string().replacen("Z", "+00:00", 1);
134✔
1071

1072
            // Pad any microseconds part with zeros
1073
            if let Some((first, second)) = temp_str.split_once('.')
134✔
1074
                && let Some(index) = second.find(|c: char| !c.is_numeric())
681✔
1075
            {
1076
                let (mut micros, tz_part) = second.split_at(index);
93✔
1077
                // Cut off excess zeroes from the start of the microseconds part
1078
                if micros.len() >= 6 {
93✔
1079
                    micros = &micros[..6];
78✔
1080
                }
78✔
1081

1082
                // Reconstitute the datetime string, right-padding the microseconds part
1083
                // with zeroes
1084
                temp_str = format!("{first}.{micros:0<6}{tz_part}");
93✔
1085
            }
41✔
1086

1087
            datetime_str = temp_str.into_pyobject(py)?;
134✔
1088
        }
470✔
1089

1090
        DATETIME_FROMISOFORMAT
604✔
1091
            .get(py)?
604✔
1092
            .call1((&datetime_str,))
604✔
1093
            .map(CompleteFrame)
604✔
1094
    }
604✔
1095

1096
    fn decode_epoch_datetime(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
60✔
1097
        // Semantic tag 1
1098
        let py = value.py();
60✔
1099
        let utc = UTC.get(py)?;
60✔
1100
        DATETIME_FROMTIMESTAMP
60✔
1101
            .get(py)?
60✔
1102
            .call1((value, utc))
60✔
1103
            .map(CompleteFrame)
60✔
1104
    }
60✔
1105

1106
    fn decode_positive_bignum(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
60✔
1107
        // Semantic tag 2
1108
        let py = value.py();
60✔
1109
        INT_FROMBYTES
60✔
1110
            .get(py)?
60✔
1111
            .call1((value, intern!(py, "big")))
60✔
1112
            .map(CompleteFrame)
60✔
1113
    }
60✔
1114

1115
    fn decode_negative_bignum(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
36✔
1116
        // Semantic tag 3
1117
        let py = value.py();
36✔
1118
        let int = INT_FROMBYTES.get(py)?.call1((value, intern!(py, "big")))?;
36✔
1119
        int.neg()?.add(-1).map(CompleteFrame)
36✔
1120
    }
36✔
1121

1122
    fn decode_fraction(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
112✔
1123
        // Semantic tag 4
1124
        let py = value.py();
112✔
1125
        let tuple = require_tuple(value, 2)?;
112✔
1126
        let decimal_class = DECIMAL_TYPE.get(py)?;
100✔
1127
        {
1128
            let exp = tuple.get_item(0)?;
100✔
1129
            let sig_tuple = decimal_class
100✔
1130
                .call1((tuple.get_item(1)?,))?
100✔
1131
                .call_method0(intern!(py, "as_tuple"))?
100✔
1132
                .cast_into::<PyTuple>()?;
100✔
1133
            let sign = sig_tuple.get_item(0)?;
100✔
1134
            let digits = sig_tuple.get_item(1)?;
100✔
1135
            let args_tuple = PyTuple::new(py, [sign, digits, exp])?;
100✔
1136
            decimal_class.call1((args_tuple,)).map(CompleteFrame)
100✔
1137
        }
1138
    }
112✔
1139

1140
    fn decode_bigfloat(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
24✔
1141
        // Semantic tag 5
1142
        let py = value.py();
24✔
1143
        let tuple = require_tuple(value, 2)?;
24✔
1144
        let decimal_class = DECIMAL_TYPE.get(py)?;
12✔
1145
        {
1146
            let exp = decimal_class.call1((tuple.get_item(0)?,))?;
12✔
1147
            let sig = decimal_class.call1((tuple.get_item(1)?,))?;
12✔
1148
            let exp = PyInt::new(py, 2).pow(exp, py.None())?;
12✔
1149
            sig.mul(exp).map(CompleteFrame)
12✔
1150
        }
1151
    }
24✔
1152

1153
    fn decode_stringref(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
108✔
1154
        // Semantic tag 25
1155
        let index: usize = value.extract()?;
108✔
1156
        Ok(StringReference(index))
108✔
1157
    }
108✔
1158

1159
    fn decode_sharedref(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
180✔
1160
        // Semantic tag 29
1161
        let index: usize = value.extract()?;
180✔
1162
        Ok(SharedReference(index))
180✔
1163
    }
180✔
1164

1165
    fn decode_rational(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
100✔
1166
        // Semantic tag 30
1167
        let py = value.py();
100✔
1168
        let tuple = require_tuple(value, 2)?;
100✔
1169
        FRACTION_TYPE.get(py)?.call1(tuple).map(CompleteFrame)
88✔
1170
    }
100✔
1171

1172
    fn decode_regexp(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
36✔
1173
        // Semantic tag 35
1174
        RE_COMPILE
36✔
1175
            .get(value.py())?
36✔
1176
            .call1((value,))
36✔
1177
            .map(CompleteFrame)
36✔
1178
    }
36✔
1179

1180
    fn decode_mime(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
24✔
1181
        // Semantic tag 36
1182
        let py = value.py();
24✔
1183
        let parser = EMAIL_PARSER.get(py)?.call0()?;
24✔
1184
        parser
24✔
1185
            .call_method1(intern!(py, "parsestr"), (value,))
24✔
1186
            .map(CompleteFrame)
24✔
1187
    }
24✔
1188

1189
    fn decode_uuid(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
200✔
1190
        // Semantic tag 37
1191
        let py = value.py();
200✔
1192
        let kwargs = PyDict::new(py);
200✔
1193
        kwargs.set_item(intern!(py, "bytes"), value)?;
200✔
1194
        UUID_TYPE
200✔
1195
            .get(py)?
200✔
1196
            .call((), Some(&kwargs))
200✔
1197
            .map(CompleteFrame)
200✔
1198
    }
200✔
1199

1200
    fn decode_ipv4(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
160✔
1201
        // Semantic tag 52
1202
        let py = value.py();
160✔
1203
        let addr = if let Ok(bytes) = value.cast::<PyBytes>() {
160✔
1204
            // The decoded value was a bytestring, so this is an IPv4 address
1205
            IPV4ADDRESS_TYPE.get(py)?.call1((bytes,))?
136✔
1206
        } else if let Ok(tuple) = value.cast_into::<PyTuple>()
24✔
1207
            && tuple.len() == 2
24✔
1208
        {
1209
            // The decoded value was a 2-item array. Check the types of the elements:
1210
            // (int, bytes) -> network
1211
            // (bytes, int) -> interface
1212
            let first_item = tuple.get_item(0)?;
24✔
1213
            let second_item = tuple.get_item(1)?;
24✔
1214
            if let Ok(prefix) = first_item.cast::<PyInt>()
24✔
1215
                && let Ok(address) = second_item.cast::<PyBytes>()
12✔
1216
            {
1217
                let mut address_vec: Vec<u8> = address.extract()?;
12✔
1218
                address_vec.resize(4, 0);
12✔
1219
                IPV4NETWORK_TYPE.get(py)?.call1(((address_vec, prefix),))?
12✔
1220
            } else if let Ok(address) = first_item.cast::<PyBytes>()
12✔
1221
                && let Ok(prefix) = second_item.cast::<PyInt>()
12✔
1222
            {
1223
                IPV4INTERFACE_TYPE.get(py)?.call1(((address, prefix),))?
12✔
1224
            } else {
1225
                return Err(CBORDecodeError::new_err("invalid types in input array"));
×
1226
            }
1227
        } else {
1228
            return Err(CBORDecodeError::new_err(
×
1229
                "input value must be a bytestring or an array of 2 elements",
×
1230
            ));
×
1231
        };
1232
        Ok(CompleteFrame(addr))
160✔
1233
    }
160✔
1234

1235
    fn decode_ipv6(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
96✔
1236
        // Semantic tag 54
1237
        let py = value.py();
96✔
1238
        let ipv6addr_class = IPV6ADDRESS_TYPE.get(py)?;
96✔
1239
        let addr = if let Ok(bytes) = value.cast::<PyBytes>() {
96✔
1240
            // The decoded value was a bytestring, so this is an IPv6 address
1241
            ipv6addr_class.call1((bytes,))?
60✔
1242
        } else if let Ok(tuple) = value.cast_into::<PyTuple>()
36✔
1243
            && (2..=3).contains(&tuple.len())
36✔
1244
        {
1245
            // The decoded value was a 2-item (or 3 with zone ID) array.
1246
            // Check the types of the elements:
1247
            // (int, bytes) -> network
1248
            // (bytes, int) -> interface
1249
            let first_item = tuple.get_item(0)?;
36✔
1250
            let second_item = tuple.get_item(1)?;
36✔
1251
            let zone_id = tuple.get_item(2).ok();
36✔
1252
            let (class, addr_bytes, prefix) = if let Ok(prefix) = first_item.cast::<PyInt>()
36✔
1253
                && let Ok(address) = second_item.cast::<PyBytes>()
12✔
1254
            {
1255
                let mut address_vec: Vec<u8> = address.extract()?;
12✔
1256
                address_vec.resize(16, 0);
12✔
1257
                Ok((
1258
                    IPV6NETWORK_TYPE.get(py)?,
12✔
1259
                    PyBytes::new(py, address_vec.as_slice()),
12✔
1260
                    prefix,
12✔
1261
                ))
1262
            } else if let Ok(address) = first_item.cast_into::<PyBytes>()
24✔
1263
                && let Ok(prefix) = second_item.cast::<PyInt>()
24✔
1264
            {
1265
                Ok((IPV6INTERFACE_TYPE.get(py)?, address, prefix))
24✔
1266
            } else {
1267
                Err(CBORDecodeError::new_err("invalid types in input array"))
×
1268
            }?;
×
1269
            let addr_obj = ipv6addr_class.call1((addr_bytes,))?;
36✔
1270

1271
            // Format the zone ID suffix if a zone ID was included
1272
            // (bytes or integer as the last item of a 3-tuple)
1273
            let zone_id_suffix = if let Some(zone_id) = zone_id {
36✔
1274
                if let Ok(zone_id_bytes) = zone_id.cast::<PyBytes>() {
24✔
1275
                    let zone_id_str = String::from_utf8(zone_id_bytes.as_bytes().to_vec())?;
12✔
1276
                    format!("%{zone_id_str}")
12✔
1277
                } else if let Ok(zone_id_int) = zone_id.cast::<PyInt>() {
12✔
1278
                    format!("%{zone_id_int}")
12✔
1279
                } else {
1280
                    return Err(CBORDecodeError::new_err(
×
1281
                        "zone ID must be an integer or a bytestring",
×
1282
                    ));
×
1283
                }
1284
            } else {
1285
                String::default()
12✔
1286
            };
1287

1288
            let formatted_addr = format!("{addr_obj}{zone_id_suffix}/{prefix}");
36✔
1289
            class.call1((formatted_addr,))?
36✔
1290
        } else {
1291
            return Err(CBORDecodeError::new_err(
×
1292
                "input value must be a bytestring or an array of 2 elements",
×
1293
            ));
×
1294
        };
1295
        Ok(CompleteFrame(addr))
96✔
1296
    }
96✔
1297

1298
    fn decode_epoch_date(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
12✔
1299
        // Semantic tag 100
1300
        let py = value.py();
12✔
1301
        let value = value.extract::<i32>()? + 719163;
12✔
1302
        DATE_FROMORDINAL.get(py)?.call1((value,)).map(CompleteFrame)
12✔
1303
    }
12✔
1304

1305
    fn decode_ipaddress(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
84✔
1306
        // Semantic tag 260 (deprecated)
1307
        let py = value.py();
84✔
1308
        let value = value.cast_into::<PyBytes>()?;
84✔
1309
        let addr_obj = match value.len()? {
72✔
1310
            4 | 16 => IPADDRESS_FUNC.get(py)?.call1((value,)),
48✔
1311
            6 => Ok(Bound::new(py, CBORTag::new_internal(260, value.into_any()))?.into_any()), // MAC address
12✔
1312
            length => Err(CBORDecodeError::new_err(format!(
12✔
1313
                "invalid IP address length ({length})"
12✔
1314
            ))),
12✔
1315
        }?;
12✔
1316
        Ok(CompleteFrame(addr_obj))
60✔
1317
    }
84✔
1318

1319
    fn decode_ipnetwork<'py>(
84✔
1320
        value: Bound<'py, PyAny>,
84✔
1321
        _immutable: bool,
84✔
1322
    ) -> PyResult<DecoderResult<'py>> {
84✔
1323
        // Semantic tag 261 (deprecated)
1324
        let py = value.py();
84✔
1325
        let value: Bound<'py, PyMapping> = value.cast_into()?;
84✔
1326
        let length = value.len()?;
84✔
1327
        if length != 1 {
84✔
1328
            return Err(CBORDecodeError::new_err(format!(
12✔
1329
                "invalid input map length for IP network: {}",
12✔
1330
                length
12✔
1331
            )));
12✔
1332
        }
72✔
1333
        let first_item = value.items()?.get_item(0)?;
72✔
1334
        let mask_length = first_item.get_item(1)?;
72✔
1335
        if !mask_length.is_exact_instance_of::<PyInt>() {
72✔
1336
            return Err(CBORDecodeError::new_err(format!(
12✔
1337
                "invalid mask length for IP network: {mask_length}"
12✔
1338
            )));
12✔
1339
        }
60✔
1340

1341
        let addr_obj = match IPNETWORK_FUNC.get(py)?.call1((&first_item,)) {
60✔
1342
            Ok(ip_network) => Ok(ip_network),
48✔
1343
            Err(e) => {
12✔
1344
                // A CompleteFrameError may indicate that the bytestring has host bits set, so try parsing
1345
                // it as an IP interface instead
1346
                if e.is_instance_of::<PyValueError>(py) {
12✔
1347
                    IPINTERFACE_FUNC.get(py)?.call1((first_item,))
12✔
1348
                } else {
1349
                    Err(e)
×
1350
                }
1351
            }
1352
        }?;
×
1353
        Ok(CompleteFrame(addr_obj))
60✔
1354
    }
84✔
1355

1356
    fn decode_date_string(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
12✔
1357
        // Semantic tag 1004
1358
        let py = value.py();
12✔
1359
        let date = DATE_FROMISOFORMAT.get(py)?.call1((value,))?;
12✔
1360
        Ok(CompleteFrame(date))
12✔
1361
    }
12✔
1362

1363
    fn decode_complex(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
252✔
1364
        // Semantic tag 43000
1365
        let py = value.py();
252✔
1366
        let tuple = require_tuple(value, 2)?;
252✔
1367
        let real: f64 = tuple.get_item(0)?.extract()?;
252✔
1368
        let imag: f64 = tuple.get_item(1)?.extract()?;
252✔
1369
        Ok(CompleteFrame(
252✔
1370
            PyComplex::from_doubles(py, real, imag).into_any(),
252✔
1371
        ))
252✔
1372
    }
252✔
1373

1374
    fn decode_self_describe_cbor(value: Bound<PyAny>, _immutable: bool) -> PyResult<DecoderResult> {
24✔
1375
        // Semantic tag 55799
1376
        Ok(CompleteFrame(value))
24✔
1377
    }
24✔
1378

1379
    fn decode_set<'py>(
268✔
1380
        &mut self,
268✔
1381
        py: Python<'py>,
268✔
1382
        immutable: bool,
268✔
1383
    ) -> PyResult<DecoderResult<'py>> {
268✔
1384
        // Semantic tag 258
1385
        let mut set_or_none = if immutable {
268✔
1386
            None
36✔
1387
        } else {
1388
            Some(PySet::empty(py)?.into_any())
232✔
1389
        };
1390
        let container = set_or_none.clone();
268✔
1391
        let callback = move |item: Bound<'py, PyAny>, _immutable: bool| {
268✔
1392
            let container: Bound<'py, PyAny> = if let Some(set) = set_or_none.take() {
256✔
1393
                set.call_method1(intern!(py, "update"), (item,))?;
220✔
1394
                set.into_any()
220✔
1395
            } else {
1396
                let tuple = item.cast_into::<PyTuple>()?;
36✔
1397
                PyFrozenSet::new(py, tuple)?.into_any()
36✔
1398
            };
1399
            Ok(CompleteFrame(container))
256✔
1400
        };
256✔
1401
        Ok(BeginFrame(
268✔
1402
            Box::new(callback),
268✔
1403
            true,
268✔
1404
            container,
268✔
1405
            DisplayName::String("set"),
268✔
1406
        ))
268✔
1407
    }
268✔
1408
}
1409

1410
#[pymethods]
×
1411
impl CBORDecoder {
1412
    #[new]
1413
    #[pyo3(signature = (
1414
        fp,
1415
        *,
1416
        tag_hook = None,
1417
        object_hook = None,
1418
        array_hook = None,
1419
        semantic_decoders = None,
1420
        str_errors = "strict",
1421
        read_size = 4096,
1422
        max_depth = 400,
1423
        allow_indefinite = true,
1424
        allow_duplicate_keys = true,
1425
    ))]
1426
    pub fn new(
480✔
1427
        py: Python<'_>,
480✔
1428
        fp: &Bound<'_, PyAny>,
480✔
1429
        tag_hook: Option<&Bound<'_, PyAny>>,
480✔
1430
        object_hook: Option<&Bound<'_, PyAny>>,
480✔
1431
        array_hook: Option<&Bound<'_, PyAny>>,
480✔
1432
        semantic_decoders: Option<&Bound<'_, PyMapping>>,
480✔
1433
        str_errors: &str,
480✔
1434
        read_size: usize,
480✔
1435
        max_depth: usize,
480✔
1436
        allow_indefinite: bool,
480✔
1437
        allow_duplicate_keys: bool,
480✔
1438
    ) -> PyResult<Self> {
480✔
1439
        Self::new_internal(
480✔
1440
            py,
480✔
1441
            Some(fp),
480✔
1442
            None,
480✔
1443
            tag_hook,
480✔
1444
            object_hook,
480✔
1445
            array_hook,
480✔
1446
            semantic_decoders,
480✔
1447
            str_errors,
480✔
1448
            read_size,
480✔
1449
            max_depth,
480✔
1450
            allow_indefinite,
480✔
1451
            allow_duplicate_keys,
480✔
1452
        )
1453
    }
480✔
1454

1455
    #[getter]
1456
    fn fp(&self, py: Python<'_>) -> Option<Py<PyAny>> {
×
1457
        self.fp.as_ref().map(|fp| fp.clone_ref(py))
×
UNCOV
1458
    }
×
1459

1460
    #[setter]
1461
    fn set_fp(&mut self, fp: &Bound<'_, PyAny>) -> PyResult<()> {
492✔
1462
        let result = fp.call_method0("readable");
492✔
1463
        if let Ok(readable) = &result
492✔
1464
            && readable.is_truthy()?
480✔
1465
        {
1466
            self.fp_is_seekable = fp.call_method0("seekable")?.is_truthy()?;
468✔
1467
            let fp = fp.clone();
468✔
1468
            self.read_method = Some(fp.getattr("read")?.unbind());
468✔
1469
            self.fp = Some(fp.unbind());
468✔
1470
            self.available_bytes = 0;
468✔
1471
            self.read_position = 0;
468✔
1472
            self.buffer = None;
468✔
1473
            Ok(())
468✔
1474
        } else {
1475
            raise_exc_from(
24✔
1476
                fp.py(),
24✔
1477
                PyValueError::new_err("fp must be a readable file-like object"),
24✔
1478
                result.err(),
24✔
1479
            )
1480
        }
1481
    }
492✔
1482

1483
    #[getter]
1484
    fn tag_hook(&self, py: Python<'_>) -> Option<Py<PyAny>> {
12✔
1485
        self.tag_hook
12✔
1486
            .as_ref()
12✔
1487
            .map(|tag_hook| tag_hook.clone_ref(py))
12✔
1488
    }
12✔
1489

1490
    #[setter]
1491
    fn set_tag_hook(&mut self, tag_hook: Option<&Bound<'_, PyAny>>) -> PyResult<()> {
4,716✔
1492
        if let Some(tag_hook) = tag_hook {
4,716✔
1493
            if !tag_hook.is_callable() {
132✔
1494
                return Err(PyErr::new::<PyTypeError, _>(
12✔
1495
                    "tag_hook must be callable or None",
12✔
1496
                ));
12✔
1497
            }
120✔
1498

1499
            self.tag_hook = Some(tag_hook.clone().unbind());
120✔
1500
        } else {
4,584✔
1501
            self.tag_hook = None;
4,584✔
1502
        }
4,584✔
1503
        Ok(())
4,704✔
1504
    }
4,716✔
1505

1506
    #[getter]
1507
    fn object_hook(&self, py: Python<'_>) -> Option<Py<PyAny>> {
12✔
1508
        self.object_hook
12✔
1509
            .as_ref()
12✔
1510
            .map(|object_hook| object_hook.clone_ref(py))
12✔
1511
    }
12✔
1512

1513
    #[setter]
1514
    fn set_object_hook(&mut self, object_hook: Option<&Bound<'_, PyAny>>) -> PyResult<()> {
4,704✔
1515
        if let Some(object_hook) = object_hook {
4,704✔
1516
            if !object_hook.is_callable() {
48✔
1517
                return Err(PyErr::new::<PyTypeError, _>(
12✔
1518
                    "object_hook must be callable or None",
12✔
1519
                ));
12✔
1520
            }
36✔
1521

1522
            self.object_hook = Some(object_hook.clone().unbind());
36✔
1523
        } else {
4,656✔
1524
            self.object_hook = None;
4,656✔
1525
        }
4,656✔
1526
        Ok(())
4,692✔
1527
    }
4,704✔
1528

1529
    #[getter]
1530
    fn array_hook(&self, py: Python<'_>) -> Option<Py<PyAny>> {
12✔
1531
        self.array_hook
12✔
1532
            .as_ref()
12✔
1533
            .map(|array_hook| array_hook.clone_ref(py))
12✔
1534
    }
12✔
1535

1536
    #[setter]
1537
    fn set_array_hook(&mut self, array_hook: Option<&Bound<'_, PyAny>>) -> PyResult<()> {
4,692✔
1538
        if let Some(array_hook) = array_hook {
4,692✔
1539
            if !array_hook.is_callable() {
108✔
1540
                return Err(PyErr::new::<PyTypeError, _>(
12✔
1541
                    "array_hook must be callable or None",
12✔
1542
                ));
12✔
1543
            }
96✔
1544

1545
            self.array_hook = Some(array_hook.clone().unbind());
96✔
1546
        } else {
4,584✔
1547
            self.array_hook = None;
4,584✔
1548
        }
4,584✔
1549
        Ok(())
4,680✔
1550
    }
4,692✔
1551

1552
    #[getter]
1553
    fn str_errors(&self, py: Python<'_>) -> Py<PyString> {
60✔
1554
        if let Some(str_errors) = self.str_errors.as_ref() {
60✔
1555
            str_errors.clone_ref(py)
48✔
1556
        } else {
1557
            intern!(py, "strict").clone().unbind()
12✔
1558
        }
1559
    }
60✔
1560

1561
    #[setter]
1562
    fn set_str_errors(&mut self, str_errors: &Bound<'_, PyString>) -> PyResult<()> {
4,680✔
1563
        let as_string: &str = str_errors.extract()?;
4,680✔
1564
        self.str_errors = match as_string {
4,680✔
1565
            "strict" => None,
4,680✔
1566
            "ignore" | "replace" | "backslashreplace" | "surrogateescape" => {
108✔
1567
                Some(str_errors.clone().unbind())
96✔
1568
            }
1569
            _ => {
1570
                return Err(PyValueError::new_err(format!(
12✔
1571
                    "invalid str_errors value: '{str_errors}'"
12✔
1572
                )));
12✔
1573
            }
1574
        };
1575
        Ok(())
4,668✔
1576
    }
4,680✔
1577

1578
    /// Read bytes from the data stream.
1579
    ///
1580
    /// :param amount: the number of bytes to read
1581
    #[pyo3(signature = (amount, /))]
1582
    fn read(&mut self, py: Python<'_>, amount: usize) -> PyResult<Vec<u8>> {
3,836✔
1583
        if amount == 0 {
3,836✔
1584
            return Ok(Vec::default());
224✔
1585
        }
3,612✔
1586

1587
        if self.available_bytes == 0 {
3,612✔
1588
            // No buffer
1589
            let (new_bytes, amount_read) = self.read_from_fp(py, amount)?;
72✔
1590
            self.read_position = amount;
12✔
1591
            self.available_bytes = amount_read - amount;
12✔
1592
            let new_buffer = new_bytes.as_bytes()[..amount].to_vec();
12✔
1593
            self.buffer = Some(new_bytes.unbind());
12✔
1594
            Ok(new_buffer)
12✔
1595
        } else if self.available_bytes < amount {
3,540✔
1596
            // Combine the remnants of the partial buffer with new data read from the file
1597
            let needed_bytes = amount - self.available_bytes;
96✔
1598
            let mut concatenated_buffer: Vec<u8> =
96✔
1599
                self.buffer.take().unwrap().as_bytes(py)[self.read_position..].to_vec();
96✔
1600
            let (new_bytes, amount_read) = self.read_from_fp(py, needed_bytes)?;
96✔
1601
            concatenated_buffer.extend_from_slice(&new_bytes[..needed_bytes]);
24✔
1602
            self.buffer = Some(new_bytes.unbind());
24✔
1603
            self.available_bytes = amount_read - needed_bytes;
24✔
1604
            self.read_position = needed_bytes;
24✔
1605
            Ok(concatenated_buffer)
24✔
1606
        } else {
1607
            // Return a slice from the existing bytes object
1608
            let vec = self.buffer.as_ref().unwrap().as_bytes(py)
3,444✔
1609
                [self.read_position..self.read_position + amount]
3,444✔
1610
                .to_vec();
3,444✔
1611
            self.available_bytes -= amount;
3,444✔
1612
            self.read_position += amount;
3,444✔
1613
            Ok(vec)
3,444✔
1614
        }
1615
    }
3,836✔
1616

1617
    /// Decode the next value from the stream.
1618
    ///
1619
    /// :param immutable: if :data:`True`, decode the next item as an immutable type
1620
    ///     (e.g. :class:`tuple` instead of a :class:`list`), if possible
1621
    /// :return: the decoded object
1622
    /// :raises CBORDecodeError: if there is any problem decoding the stream
1623
    #[pyo3(signature = (*, immutable = false))]
1624
    pub fn decode<'py>(&mut self, py: Python<'py>, immutable: bool) -> PyResult<Bound<'py, PyAny>> {
4,524✔
1625
        let mut frames: Vec<StackFrame> = Vec::new();
4,524✔
1626

1627
        fn add_frame<'a>(
11,536✔
1628
            frames: &mut Vec<StackFrame<'a>>,
11,536✔
1629
            max_depth: usize,
11,536✔
1630
            frame: StackFrame<'a>,
11,536✔
1631
        ) -> PyResult<()> {
11,536✔
1632
            if frames.len() == max_depth {
11,536✔
1633
                return Err(CBORDecodeError::new_err(format!(
24✔
1634
                    "maximum container nesting depth ({max_depth}) exceeded",
24✔
1635
                )));
24✔
1636
            }
11,512✔
1637

1638
            frames.push(frame);
11,512✔
1639
            Ok(())
11,512✔
1640
        }
11,536✔
1641

1642
        fn wrap_exception(py: Python<'_>, err: PyErr, typename: &DisplayName) -> PyErr {
648✔
1643
            if err.is_instance_of::<CBORDecodeEOF>(py) {
648✔
1644
                err
120✔
1645
            } else if err.is_instance_of::<CBORDecodeError>(py) {
528✔
1646
                CBORDecodeError::new_err(format!(
276✔
1647
                    "error decoding {}: {}",
1648
                    typename,
1649
                    err.arguments(py)
276✔
1650
                ))
1651
            } else {
1652
                create_exc_from(
252✔
1653
                    py,
252✔
1654
                    CBORDecodeError::new_err(format!("error decoding {}", typename)),
252✔
1655
                    Some(err),
252✔
1656
                )
1657
            }
1658
        }
648✔
1659

1660
        let mut shareables: Vec<Option<Bound<'py, PyAny>>> = Vec::new();
4,524✔
1661
        let mut string_namespaces: Vec<Vec<Bound<'py, PyAny>>> = Vec::new();
4,524✔
1662
        let mut value: Option<Bound<'py, PyAny>> = None;
4,524✔
1663
        let mut current_immutable: bool = immutable;
4,524✔
1664
        loop {
1665
            let result: PyResult<DecoderResult<'py>> = if let Some(previous_value) = value.take() {
45,568✔
1666
                // Call the decoder callback of the last frame
1667
                let frame = frames.last_mut().unwrap();
17,816✔
1668
                if let Some(decoder_callback) = frame.decoder_callback.as_mut() {
17,816✔
1669
                    decoder_callback(previous_value, frame.immutable)
17,720✔
1670
                        .map_err(|e| wrap_exception(py, e, &frame.typename))
17,720✔
1671
                } else if frame.contains_string_namespace {
96✔
1672
                    string_namespaces
24✔
1673
                        .pop()
24✔
1674
                        .expect("no string namespaces to pop from");
24✔
1675
                    Ok(CompleteFrame(previous_value))
24✔
1676
                } else if let Some(shareable_index) = frame.shareable_index {
72✔
1677
                    shareables[shareable_index].get_or_insert_with(|| previous_value.clone());
72✔
1678
                    Ok(CompleteFrame(previous_value))
72✔
1679
                } else {
1680
                    panic!("no decoder callback, shareable index or string namespace");
×
1681
                }
1682
            } else {
1683
                let (major_type, subtype) = self.read_major_and_subtype(py)?;
27,752✔
1684
                match major_type {
27,728✔
1685
                    0 => self.decode_uint(py, subtype),
3,456✔
1686
                    1 => self.decode_negint(py, subtype),
380✔
1687
                    2 => self.decode_bytestring(py, subtype),
1,212✔
1688
                    3 => self.decode_string(py, subtype),
2,516✔
1689
                    4 => self.decode_array(py, subtype, current_immutable),
7,208✔
1690
                    5 => self.decode_map(py, subtype, current_immutable),
1,884✔
1691
                    6 => self.decode_semantic(py, subtype, current_immutable),
3,052✔
1692
                    7 => self.decode_special(py, subtype),
8,020✔
1693
                    _ => Err(CBORDecodeError::new_err(format!(
×
1694
                        "invalid major type: {major_type}"
×
1695
                    ))),
×
1696
                }
1697
                .map_err(|e| {
27,728✔
1698
                    let typename = match major_type {
360✔
1699
                        0 => "unsigned integer",
12✔
1700
                        1 => "negative integer",
×
1701
                        2 => "byte string",
108✔
1702
                        3 => "text string",
168✔
1703
                        4 => "array",
×
1704
                        5 => "map",
×
1705
                        6 => "semantic tag",
×
1706
                        7 => "special value",
72✔
1707
                        _ => unreachable!("invalid major types should have been handled earlier"),
×
1708
                    };
1709
                    wrap_exception(py, e, &DisplayName::String(typename))
360✔
1710
                })
360✔
1711
            };
1712

1713
            match result {
44,896✔
1714
                Ok(BeginFrame(callback, requested_immutable, container, typename)) => {
11,248✔
1715
                    if let Some(frame) = frames.last_mut()
11,248✔
1716
                        && let Some(container) = container
8,572✔
1717
                        && let Some(shareable_index) = frame.shareable_index
6,064✔
1718
                    {
156✔
1719
                        frames.pop();
156✔
1720
                        shareables[shareable_index] = Some(container.clone());
156✔
1721
                    }
11,092✔
1722
                    current_immutable = current_immutable || requested_immutable;
11,248✔
1723
                    add_frame(
11,248✔
1724
                        &mut frames,
11,248✔
1725
                        self.max_depth,
11,248✔
1726
                        StackFrame {
11,248✔
1727
                            immutable: current_immutable,
11,248✔
1728
                            decoder_callback: Some(callback),
11,248✔
1729
                            shareable_index: None,
11,248✔
1730
                            typename,
11,248✔
1731
                            contains_string_namespace: false,
11,248✔
1732
                        },
11,248✔
1733
                    )?;
24✔
1734
                }
1735
                Ok(ContinueFrame(require_immutable)) => {
11,716✔
1736
                    // If require_immutable is true, the next value must be immutable
1737
                    // Otherwise, restore the immutable flag to the previous value
1738
                    current_immutable = if frames.len() >= 2 {
11,716✔
1739
                        frames.get(frames.len() - 2).unwrap().immutable
2,668✔
1740
                    } else {
1741
                        immutable
9,048✔
1742
                    } || require_immutable;
10,292✔
1743
                    frames.last_mut().unwrap().immutable = current_immutable;
11,716✔
1744
                }
1745
                Ok(CompleteFrame(new_value)) => {
5,524✔
1746
                    frames
5,524✔
1747
                        .pop()
5,524✔
1748
                        .expect("received frame completion but there are no frames on the stack");
5,524✔
1749
                    current_immutable = frames.last().map_or(immutable, |frame| frame.immutable);
5,524✔
1750
                    value = Some(new_value);
5,524✔
1751
                }
1752
                Ok(Value(new_value)) => {
12,416✔
1753
                    value = Some(new_value);
12,416✔
1754
                }
12,416✔
1755
                Ok(StringNamespace) => {
1756
                    add_frame(
48✔
1757
                        &mut frames,
48✔
1758
                        self.max_depth,
48✔
1759
                        StackFrame {
48✔
1760
                            immutable: current_immutable,
48✔
1761
                            decoder_callback: None,
48✔
1762
                            shareable_index: None,
48✔
1763
                            typename: DisplayName::String("string namespace"),
48✔
1764
                            contains_string_namespace: true,
48✔
1765
                        },
48✔
1766
                    )?;
×
1767
                    string_namespaces.push(Vec::new());
48✔
1768
                }
1769
                Ok(StringValue(string, length)) => {
3,416✔
1770
                    // Conditionally add the string to the innermost string namespace
1771
                    if let Some(namespace) = string_namespaces.last_mut()
3,416✔
1772
                        && match namespace.len() {
60✔
1773
                            0..24 => length >= 3,
60✔
1774
                            24..256 => length >= 4,
×
1775
                            256..65536 => length >= 5,
×
1776
                            65536..=4294967295 => length >= 6,
×
1777
                            _ => length >= 11,
×
1778
                        }
1779
                    {
60✔
1780
                        namespace.push(string.clone());
60✔
1781
                    }
3,356✔
1782
                    value = Some(string);
3,416✔
1783
                }
1784
                Ok(StringReference(index)) => {
108✔
1785
                    frames
108✔
1786
                        .pop()
108✔
1787
                        .expect("  received string reference but there are no frames on the stack");
108✔
1788
                    if let Some(namespace) = string_namespaces.last() {
108✔
1789
                        if let Some(string) = namespace.get(index) {
96✔
1790
                            value = Some(string.clone());
84✔
1791
                        } else {
84✔
1792
                            return Err(CBORDecodeError::new_err(format!(
12✔
1793
                                "string reference {index} not found"
12✔
1794
                            )));
12✔
1795
                        }
1796
                    } else {
1797
                        return Err(CBORDecodeError::new_err(
12✔
1798
                            "string reference outside of namespace",
12✔
1799
                        ));
12✔
1800
                    }
1801
                    current_immutable = frames
84✔
1802
                        .last()
84✔
1803
                        .map_or(current_immutable, |frame| frame.immutable);
84✔
1804
                }
1805
                Ok(Shareable) => {
1806
                    add_frame(
240✔
1807
                        &mut frames,
240✔
1808
                        self.max_depth,
240✔
1809
                        StackFrame {
240✔
1810
                            immutable: current_immutable,
240✔
1811
                            decoder_callback: None,
240✔
1812
                            shareable_index: Some(shareables.len()),
240✔
1813
                            typename: DisplayName::String("shareable value"),
240✔
1814
                            contains_string_namespace: false,
240✔
1815
                        },
240✔
1816
                    )?;
×
1817
                    shareables.push(None);
240✔
1818
                }
1819
                Ok(SharedReference(index)) => {
180✔
1820
                    frames
180✔
1821
                        .pop()
180✔
1822
                        .expect("received shared reference but there are no frames on the stack");
180✔
1823
                    value = match shareables.get(index) {
180✔
1824
                        Some(Some(value)) => Some(value.clone()),
144✔
1825
                        Some(None) => {
1826
                            return Err(CBORDecodeError::new_err(format!(
12✔
1827
                                "shared value {index} has not been initialized"
12✔
1828
                            )));
12✔
1829
                        }
1830
                        None => {
1831
                            return Err(CBORDecodeError::new_err(format!(
24✔
1832
                                "shared reference {index} not found"
24✔
1833
                            )));
24✔
1834
                        }
1835
                    };
1836
                    current_immutable = frames
144✔
1837
                        .last()
144✔
1838
                        .map_or(current_immutable, |frame| frame.immutable);
144✔
1839
                }
1840
                Err(err) => {
648✔
1841
                    // If an Exception was raised, wrap it in a CBORDecodeError
1842
                    // If a ValueError was raised, wrap it in a CBORDecodeError
1843
                    return if err.is_instance_of::<CBORDecodeError>(py) {
648✔
1844
                        Err(err)
648✔
1845
                    } else if err.is_instance_of::<PyValueError>(py)
×
1846
                        || err.is_instance_of::<PyException>(py)
×
1847
                    {
1848
                        Err(create_exc_from(
×
1849
                            py,
×
1850
                            CBORDecodeError::new_err(err.to_string()),
×
1851
                            Some(err),
×
1852
                        ))
×
1853
                    } else {
1854
                        Err(err)
×
1855
                    };
1856
                }
1857
            }
1858

1859
            if frames.is_empty() {
44,812✔
1860
                // If fp was seekable and excess data has been read, empty the buffer and
1861
                // rewind the file
1862
                if self.available_bytes > 0
3,768✔
1863
                    && let Some(fp) = &self.fp
24✔
1864
                {
1865
                    let offset = -(self.available_bytes as isize);
24✔
1866
                    fp.call_method1(py, intern!(py, "seek"), (offset, SEEK_CUR))?;
24✔
1867
                    self.buffer = None;
24✔
1868
                    self.available_bytes = 0;
24✔
1869
                    self.read_position = 0;
24✔
1870
                }
3,744✔
1871
                return Ok(value.expect("stack is empty but final return value is missing"));
3,768✔
1872
            }
41,044✔
1873
        }
1874
    }
4,524✔
1875
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc