• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

input-output-hk / catalyst-libs / 16165686561

09 Jul 2025 09:30AM UTC coverage: 68.936%. First build
16165686561

Pull #407

github

web-flow
Merge 0394ae809 into cd679dc34
Pull Request #407: feat(rust/cbork): Add deterministically decoding array

205 of 221 new or added lines in 1 file covered. (92.76%)

12938 of 18768 relevant lines covered (68.94%)

2376.72 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.76
/rust/cbork-utils/src/array.rs
1
//! CBOR array (CBOR major type 4) structure with CBOR decoding and encoding
2
//! functionality. Supports deterministically encoded rules (RFC 8949 Section 4.2) if
3
//! corresponding option is enabled.
4

5
use std::{ops::Deref, vec::IntoIter};
6

7
use crate::{
8
    decode_context::DecodeCtx,
9
    decode_helper::get_bytes,
10
    deterministic_helper::{get_cbor_header_size, get_declared_length, CBOR_MAX_TINY_VALUE},
11
};
12

13
/// Represents a CBOR array, preserving original decoding order of values.
14
#[derive(Clone, Debug, PartialEq, Eq)]
15
pub struct Array(pub Vec<Vec<u8>>);
16

17
impl Deref for Array {
18
    type Target = Vec<Vec<u8>>;
19

20
    fn deref(&self) -> &Self::Target {
6✔
21
        &self.0
6✔
22
    }
6✔
23
}
24

25
impl IntoIterator for Array {
26
    type IntoIter = IntoIter<Vec<u8>>;
27
    type Item = Vec<u8>;
28

NEW
29
    fn into_iter(self) -> Self::IntoIter {
×
NEW
30
        self.0.into_iter()
×
NEW
31
    }
×
32
}
33

34
/// Major type indicator for CBOR arrays (major type 4: 100 in top 3 bits)
35
/// As per RFC 8949 Section 4.2, arrays in deterministic encoding must:
36
/// - Have lengths encoded minimally (Section 4.2.1)
37
/// - Use definite-length encoding only (Section 4.2.2)
38
/// - Have all elements themselves deterministically encoded
39
const CBOR_MAJOR_TYPE_ARRAY: u8 = 4 << 5;
40

41
/// Initial byte for a CBOR array whose length is encoded as an 8-bit unsigned integer
42
/// (uint8).
43
///
44
/// This value combines the array major type (4) with the additional information value
45
/// (24) that indicates a uint8 length follows. The resulting byte is:
46
/// - High 3 bits: 100 (major type 4 for array)
47
/// - Low 5 bits: 24 (indicates uint8 length follows)
48
///
49
/// Used when encoding CBOR arrays with lengths between 24 and 255 elements.
50
const CBOR_ARRAY_LENGTH_UINT8: u8 = CBOR_MAJOR_TYPE_ARRAY | 24; // For uint8 length encoding
51

52
/// Decodes a CBOR array with deterministic encoding validation (RFC 8949 Section 4.2)
53
/// Returns the raw bytes of the array elements if it passes all deterministic validation
54
/// rules.
55
///
56
/// From RFC 8949 Section 4.2:
57
/// Arrays must follow these deterministic encoding rules:
58
/// - Array lengths must use minimal encoding (Section 4.2.1)
59
/// - Indefinite-length arrays are not allowed (Section 4.2.2)
60
/// - All array elements must themselves be deterministically encoded
61
///
62
/// # Errors
63
///
64
/// Returns `DeterministicError` if:
65
/// - Input is empty (`UnexpectedEof`)
66
/// - Array uses indefinite-length encoding (`IndefiniteLength`)
67
/// - Array length is not encoded minimally (`NonMinimalInt`)
68
/// - Array element decoding fails (`DecoderError`)
69
/// - Array elements are not deterministically encoded
70
impl minicbor::Decode<'_, DecodeCtx> for Array {
71
    fn decode(
12✔
72
        d: &mut minicbor::Decoder<'_>, ctx: &mut DecodeCtx,
12✔
73
    ) -> Result<Self, minicbor::decode::Error> {
12✔
74
        // Capture position before reading the array header
12✔
75
        let header_start_pos = d.position();
12✔
76

77
        // Handle both definite and indefinite-length arrays
78
        let array_len = d.array()?;
12✔
79

80
        if let Some(length) = array_len {
12✔
81
            // Definite-length array
82
            if matches!(ctx, DecodeCtx::Deterministic) {
10✔
83
                ctx.try_check(|| check_array_minimal_length(d, header_start_pos, length))?;
9✔
84
            }
1✔
85

86
            let elements = decode_array_elements(d, length, ctx)?;
9✔
87
            Ok(Self(elements))
9✔
88
        } else {
89
            // Indefinite-length array
90
            if matches!(ctx, DecodeCtx::Deterministic) {
2✔
91
                return Err(minicbor::decode::Error::message(
1✔
92
                    "Indefinite-length items must be made definite-length items",
1✔
93
                ));
1✔
94
            }
1✔
95

1✔
96
            // In non-deterministic mode, accept indefinite-length arrays
1✔
97
            // minicbor should handle the indefinite-length decoding for us
1✔
98
            // We'll use Vec<minicbor::data::Type> to decode heterogeneous elements
1✔
99
            let mut elements = Vec::new();
1✔
100

101
            // Since we can't easily determine when indefinite arrays end,
102
            // we'll need to work with the raw bytes approach
103
            let Some(remaining_input) = &d.input().get(d.position()..) else {
1✔
NEW
104
                return Err(minicbor::decode::Error::message("Invalid slicing position"));
×
105
            };
106
            let mut temp_decoder = minicbor::Decoder::new(remaining_input);
1✔
107

108
            // Decode elements until we hit the break marker (0xFF)
109
            while temp_decoder.position() < temp_decoder.input().len() {
3✔
110
                // Check if we've hit the break marker
111
                if temp_decoder.input().get(temp_decoder.position()) == Some(&0xFF) {
3✔
112
                    // Skip the break marker
113
                    temp_decoder.skip().ok();
1✔
114
                    break;
1✔
115
                }
2✔
116

2✔
117
                let element_start = temp_decoder.position();
2✔
118
                if temp_decoder.skip().is_err() {
2✔
NEW
119
                    break;
×
120
                }
2✔
121
                let element_end = temp_decoder.position();
2✔
122

2✔
123
                if element_end > element_start {
2✔
124
                    let Some(element_bytes) = temp_decoder.input().get(element_start..element_end)
2✔
125
                    else {
NEW
126
                        return Err(minicbor::decode::Error::message("Invalid slicing position"));
×
127
                    };
128
                    elements.push(element_bytes.to_vec());
2✔
NEW
129
                }
×
130
            }
131

132
            let Some(next_pos) = d.position().checked_add(temp_decoder.position()) else {
1✔
NEW
133
                return Err(minicbor::decode::Error::message(
×
NEW
134
                    "Addition of next positions overflowed",
×
NEW
135
                ));
×
136
            };
137

138
            // Update the main decoder position
139
            d.set_position(next_pos);
1✔
140

1✔
141
            Ok(Self(elements))
1✔
142
        }
143
    }
12✔
144
}
145

146
/// Validates that a CBOR array's length is encoded using the minimal number of bytes as
147
/// required by RFC 8949's deterministic encoding rules.
148
///
149
/// According to the deterministic encoding requirements:
150
/// - The length of an array MUST be encoded using the smallest possible CBOR additional
151
///   information value
152
/// - For values 0 through 23, the additional info byte is used directly
153
/// - For values that fit in 8, 16, 32, or 64 bits, the appropriate multi-byte encoding
154
///   must be used
155
///
156
/// # Specification Reference
157
/// This implementation follows RFC 8949 Section 4.2.1 which requires that:
158
/// "The length of arrays, maps, and strings MUST be encoded using the smallest possible
159
/// CBOR additional information value."
160
fn check_array_minimal_length(
9✔
161
    decoder: &minicbor::Decoder, header_start_pos: usize, value: u64,
9✔
162
) -> Result<(), minicbor::decode::Error> {
9✔
163
    // For zero length, 0x80 is always the minimal encoding
9✔
164
    if value == 0 {
9✔
165
        return Ok(());
1✔
166
    }
8✔
167

168
    let initial_byte = decoder
8✔
169
        .input()
8✔
170
        .get(header_start_pos)
8✔
171
        .copied()
8✔
172
        .ok_or_else(|| {
8✔
NEW
173
            minicbor::decode::Error::message("Cannot read initial byte for minimality check")
×
174
        })?;
8✔
175

176
    // Only check minimality for array length encodings using uint8
177
    // Immediate values (0-23) are already minimal by definition
178
    if initial_byte == CBOR_ARRAY_LENGTH_UINT8 && value <= CBOR_MAX_TINY_VALUE {
8✔
179
        return Err(minicbor::decode::Error::message(
1✔
180
            "array minimal length failure",
1✔
181
        ));
1✔
182
    }
7✔
183

7✔
184
    Ok(())
7✔
185
}
9✔
186

187
/// Decodes all elements in the array
188
fn decode_array_elements(
9✔
189
    d: &mut minicbor::Decoder, length: u64, ctx: &mut DecodeCtx,
9✔
190
) -> Result<Vec<Vec<u8>>, minicbor::decode::Error> {
9✔
191
    let capacity = usize::try_from(length).map_err(|_| {
9✔
NEW
192
        minicbor::decode::Error::message("Array length too large for current platform")
×
193
    })?;
9✔
194
    let mut elements = Vec::with_capacity(capacity);
9✔
195

9✔
196
    // Decode each array element
9✔
197
    for _ in 0..length {
9✔
198
        // Record the starting position of the element
199
        let element_start = d.position();
18✔
200

18✔
201
        // Skip over the element to find its end position
18✔
202
        d.skip()?;
18✔
203
        let element_end = d.position();
18✔
204

205
        // The elements themselves must be deterministically encoded (4.2.1)
206
        let element_bytes = get_bytes(d, element_start, element_end)?.to_vec();
18✔
207

208
        // Only check deterministic encoding in deterministic mode
209
        if matches!(ctx, DecodeCtx::Deterministic) {
18✔
210
            ctx.try_check(|| array_elements_are_deterministic(&element_bytes))?;
17✔
211
        }
1✔
212

213
        elements.push(element_bytes);
18✔
214
    }
215

216
    Ok(elements)
9✔
217
}
9✔
218

219
/// Validates that a CBOR array element follows the deterministic encoding rules as
220
/// specified in RFC 8949. In this case, it validates that the elements themselves must be
221
/// deterministically encoded (4.2.1).
222
fn array_elements_are_deterministic(element_bytes: &[u8]) -> Result<(), minicbor::decode::Error> {
17✔
223
    // if the array elements are not a txt string or byte string we cannot get a declared
224
    // length
225
    if let Some(element_declared_length) = get_declared_length(element_bytes)? {
17✔
226
        let header_size = get_cbor_header_size(element_bytes)?;
13✔
227
        let actual_content_size =
13✔
228
            element_bytes
13✔
229
                .len()
13✔
230
                .checked_sub(header_size)
13✔
231
                .ok_or_else(|| {
13✔
NEW
232
                    minicbor::decode::Error::message("Integer overflow in content size calculation")
×
233
                })?;
13✔
234

235
        if element_declared_length != actual_content_size {
13✔
NEW
236
            return Err(minicbor::decode::Error::message(
×
NEW
237
                "Declared length does not match the actual length. Non deterministic array element.",
×
NEW
238
            ));
×
239
        }
13✔
240
    }
4✔
241
    Ok(())
17✔
242
}
17✔
243

244
#[cfg(test)]
245
mod tests {
246
    use minicbor::{Decode, Decoder};
247

248
    use super::*;
249

250
    /// Ensures that encoding and decoding an array preserves:
251
    /// - The exact byte representation of elements
252
    /// - The definite length encoding format
253
    /// - The order of elements
254
    #[test]
255
    fn test_array_bytes_roundtrip() {
1✔
256
        // Create a valid deterministic array encoding
1✔
257
        let mut decoder = Decoder::new(&[
1✔
258
            0x82, // 2 elements
1✔
259
            0x41, 0x01, // h'01'
1✔
260
            0x42, 0x01, 0x02, // h'0102'
1✔
261
        ]);
1✔
262
        let result = Array::decode(&mut decoder, &mut DecodeCtx::Deterministic).unwrap();
1✔
263

1✔
264
        // Verify we got back exactly the same bytes
1✔
265
        assert_eq!(
1✔
266
            result,
1✔
267
            Array(vec![
1✔
268
                vec![0x41, 0x01],       // h'01'
1✔
269
                vec![0x42, 0x01, 0x02], // h'0102'
1✔
270
            ])
1✔
271
        );
1✔
272
    }
1✔
273

274
    /// Test empty array handling - special case mentioned in RFC 8949.
275
    /// An empty array is valid and must still follow length encoding rules
276
    /// from Section 4.2.1.
277
    #[test]
278
    fn test_empty_array() {
1✔
279
        let mut decoder = Decoder::new(&[
1✔
280
            0x80, // Array with 0 elements - encoded with immediate value as per Section 4.2.1
1✔
281
        ]);
1✔
282
        assert!(Array::decode(&mut decoder, &mut DecodeCtx::Deterministic).is_ok());
1✔
283
    }
1✔
284

285
    /// Test minimal length encoding rules for arrays as specified in RFC 8949 Section
286
    /// 4.2.1
287
    ///
288
    /// From RFC 8949 Section 4.2.1:
289
    /// "The length of arrays, maps, strings, and byte strings must be encoded in the
290
    /// smallest possible way. For arrays (major type 4), lengths 0-23 must be encoded
291
    /// in the initial byte."
292
    #[test]
293
    fn test_array_minimal_length_encoding() {
1✔
294
        // Test case 1: Valid minimal encoding (length = 1)
1✔
295
        let mut decoder = Decoder::new(&[
1✔
296
            0x81, // Array, length 1 (major type 4 with immediate value 1)
1✔
297
            0x01, // Element: unsigned int 1
1✔
298
        ]);
1✔
299
        assert!(Array::decode(&mut decoder, &mut DecodeCtx::Deterministic).is_ok());
1✔
300

301
        // Test case 2: Invalid non-minimal encoding (using additional info 24 for length 1)
302
        let mut decoder = Decoder::new(&[
1✔
303
            0x98, // Array with additional info = 24 (0x80 | 0x18)
1✔
304
            0x01, // Length encoded as uint8 = 1
1✔
305
            0x01, // Element: unsigned int 1
1✔
306
        ]);
1✔
307
        assert!(Array::decode(&mut decoder.clone(), &mut DecodeCtx::Deterministic).is_err());
1✔
308
        assert!(Array::decode(&mut decoder, &mut DecodeCtx::non_deterministic()).is_ok());
1✔
309
    }
1✔
310

311
    /// Test handling of complex element structures while maintaining deterministic
312
    /// encoding
313
    ///
314
    /// RFC 8949 Section 4.2 requires that all elements be deterministically encoded:
315
    /// "All contained items must also follow the same rules."
316
    #[test]
317
    fn test_array_complex_elements() {
1✔
318
        let mut decoder = Decoder::new(&[
1✔
319
            0x84, // Array with 4 elements
1✔
320
            0x41, 0x01, // Element 1: simple 1-byte string
1✔
321
            0x42, 0x01, 0x02, // Element 2: 2-byte string
1✔
322
            0x62, 0x68, 0x69, // Element 3: "hi"
1✔
323
            0xF9, 0x00, 0x00, // Element 4: float 0.0 half-precision canonical encoding
1✔
324
        ]);
1✔
325
        assert!(Array::decode(&mut decoder, &mut DecodeCtx::Deterministic).is_ok());
1✔
326
    }
1✔
327

328
    /// Test edge cases for array encoding while maintaining compliance with RFC 8949
329
    ///
330
    /// These cases test boundary conditions that must still follow all rules from
331
    /// Section 4.2:
332
    /// - Minimal length encoding (4.2.1)
333
    /// - No indefinite lengths (4.2.2)
334
    /// - Deterministic element encoding
335
    #[test]
336
    fn test_array_edge_cases() {
1✔
337
        // Single element array - must still follow minimal length encoding rules
1✔
338
        let mut decoder = Decoder::new(&[
1✔
339
            0x81, // Array with 1 element (using immediate value as per Section 4.2.1)
1✔
340
            0x41, 0x01, // Element: 1-byte string
1✔
341
        ]);
1✔
342
        assert!(Array::decode(&mut decoder, &mut DecodeCtx::Deterministic).is_ok());
1✔
343

344
        // Array with zero-length string element - tests smallest possible element case
345
        let mut decoder = Decoder::new(&[
1✔
346
            0x81, // Array with 1 element
1✔
347
            0x40, // Element: 0-byte string (smallest possible element)
1✔
348
        ]);
1✔
349
        assert!(Array::decode(&mut decoder, &mut DecodeCtx::Deterministic).is_ok());
1✔
350
    }
1✔
351

352
    /// Test array with multiple elements of different types
353
    #[test]
354
    fn test_array_mixed_elements() {
1✔
355
        // Array with integer, string, and nested array elements
1✔
356
        let mut decoder = Decoder::new(&[
1✔
357
            0x83, // Array with 3 elements
1✔
358
            0x01, // Element 1: unsigned int 1
1✔
359
            0x41, 0x48, // Element 2: 1-byte string "H"
1✔
360
            0x81, 0x02, // Element 3: nested array with one element (unsigned int 2)
1✔
361
        ]);
1✔
362
        assert!(Array::decode(&mut decoder, &mut DecodeCtx::Deterministic).is_ok());
1✔
363
    }
1✔
364

365
    /// Test array with multiple elements
366
    #[allow(clippy::indexing_slicing)]
367
    #[test]
368
    fn test_array_larger_size() {
1✔
369
        // Test with a simple array of 5 single-byte strings
1✔
370
        let mut decoder = Decoder::new(&[
1✔
371
            0x85, // Array with 5 elements
1✔
372
            0x41, 0x01, // Element 1: 1-byte string with value 0x01
1✔
373
            0x41, 0x02, // Element 2: 1-byte string with value 0x02
1✔
374
            0x41, 0x03, // Element 3: 1-byte string with value 0x03
1✔
375
            0x41, 0x04, // Element 4: 1-byte string with value 0x04
1✔
376
            0x41, 0x05, // Element 5: 1-byte string with value 0x05
1✔
377
        ]);
1✔
378
        let result = Array::decode(&mut decoder, &mut DecodeCtx::Deterministic);
1✔
379
        assert!(result.is_ok());
1✔
380

381
        let array = result.unwrap();
1✔
382
        assert_eq!(array.len(), 5);
1✔
383

384
        // Verify the elements are correctly decoded
385
        assert_eq!(array[0], vec![0x41, 0x01]);
1✔
386
        assert_eq!(array[1], vec![0x41, 0x02]);
1✔
387
        assert_eq!(array[2], vec![0x41, 0x03]);
1✔
388
        assert_eq!(array[3], vec![0x41, 0x04]);
1✔
389
        assert_eq!(array[4], vec![0x41, 0x05]);
1✔
390
    }
1✔
391

392
    /// Test indefinite-length array rejection in deterministic mode
393
    /// and acceptance in non-deterministic mode
394
    #[test]
395
    fn test_indefinite_length_array_rejection() {
1✔
396
        // Indefinite-length array (not allowed in deterministic encoding)
1✔
397
        let mut decoder = Decoder::new(&[
1✔
398
            0x9F, // Array with indefinite length
1✔
399
            0x01, // Element 1
1✔
400
            0x02, // Element 2
1✔
401
            0xFF, // Break code
1✔
402
        ]);
1✔
403
        assert!(Array::decode(&mut decoder.clone(), &mut DecodeCtx::Deterministic).is_err());
1✔
404
        // Should work in non-deterministic mode
405
        assert!(Array::decode(&mut decoder, &mut DecodeCtx::non_deterministic()).is_ok());
1✔
406
    }
1✔
407
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc