• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zbraniecki / icu4x / 9014530096

08 May 2024 07:27PM UTC coverage: 76.402% (+0.2%) from 76.234%
9014530096

push

github

web-flow
Add missing std pointer-like impls for DataProvider, DynamicDataProvider (#4880)

0 of 3 new or added lines in 1 file covered. (0.0%)

3218 existing lines in 167 files now uncovered.

53328 of 69799 relevant lines covered (76.4%)

504343.42 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/utils/resb/src/binary/deserializer.rs
1
// This file is part of ICU4X. For terms of use, please see the file
×
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4

5
use core::fmt;
6

7
use crate::MASK_28_BIT;
8

9
use super::{
10
    get_subslice, header::BinHeader, read_u16, BinIndex, BinaryDeserializerError, CharsetFamily,
11
    FormatVersion, ResDescriptor, ResourceReprType,
12
};
13

14
extern crate alloc;
15
use alloc::string::String;
16

17
use serde::{de, forward_to_deserialize_any, Deserialize};
18

19
/// The character set family of the current system.
20
///
21
/// Systems using EBCDIC are not supported at this time.
22
const SYSTEM_CHARSET_FAMILY: CharsetFamily = CharsetFamily::Ascii;
23

24
/// Deserializes an instance of type `T` from bytes representing a binary ICU
25
/// resource bundle.
26
pub fn from_bytes<'a, T>(input: &'a [u8]) -> Result<T, BinaryDeserializerError>
27
where
28
    T: Deserialize<'a>,
29
{
30
    let mut deserializer = ResourceTreeDeserializer::from_bytes(input)?;
31
    let t = T::deserialize(&mut deserializer)?;
32

33
    Ok(t)
34
}
35

36
/// The `ResourceTreeDeserializer` struct processes an ICU binary resource
37
/// bundle by walking the resource tree (as represented by [`ResDescriptor`]s).
38
struct ResourceTreeDeserializer<'de> {
39
    /// The current position in the input, represented as a slice beginning at
40
    /// the next byte to be read and ending at the end of input.
41
    ///
42
    /// As an invariant of the deserializer, `input` should always begin
43
    /// immediately before a resource descriptor.
44
    input: &'de [u8],
45

46
    /// The format version of the input.
47
    ///
48
    /// This is currently unused, but support for other format versions will be
49
    /// incorporated in later versions.
50
    _format_version: FormatVersion,
51

52
    /// The 16-bit data block, represented as a slice beginning at the start of
53
    /// the block. This is `None` in format versions below 2.0.
54
    data_16_bit: Option<&'de [u8]>,
55

56
    /// The keys block, represented as a slice beginning at the start of the
57
    /// block.
58
    keys: &'de [u8],
59

60
    /// The input body, represented as a slice beginning at the start of the
61
    /// block.
62
    body: &'de [u8],
63
}
64

65
impl<'de> ResourceTreeDeserializer<'de> {
66
    /// Creates a new deserializer from the header and index of the resource
67
    /// bundle.
68
    fn from_bytes(input: &'de [u8]) -> Result<Self, BinaryDeserializerError> {
×
69
        let header = BinHeader::try_from(input)?;
×
70

71
        // Verify that the representation in the resource bundle is one we're
72
        // prepared to read.
73
        if header.repr_info.charset_family != SYSTEM_CHARSET_FAMILY {
×
74
            return Err(BinaryDeserializerError::unsupported_format(
×
75
                "bundle and system character set families do not match",
76
            ));
77
        }
78

79
        if header.repr_info.size_of_char != 2 {
×
80
            return Err(BinaryDeserializerError::unsupported_format(
×
81
                "characters of size other than 2 are not supported",
82
            ));
83
        }
84

85
        if header.repr_info.format_version != FormatVersion::V2_0 {
×
86
            // Support for other versions can be added at a later time, but for
87
            // now we can only deal with 2.0.
88
            return Err(BinaryDeserializerError::unsupported_format(
×
89
                "format versions other than 2.0 are not supported at this time",
90
            ));
91
        }
92

93
        let body = get_subslice(input, header.size as usize..)?;
×
94

95
        // Skip the root resource descriptor and get the index area.
96
        let index = get_subslice(body, core::mem::size_of::<u32>()..)?;
×
97
        let index = BinIndex::try_from(index)?;
×
98

99
        // Keys begin at the start of the body.
100
        let keys = get_subslice(
×
101
            body,
102
            ..(index.keys_end as usize) * core::mem::size_of::<u32>(),
×
103
        )?;
×
104

105
        let data_16_bit = if header.repr_info.format_version < FormatVersion::V2_0 {
×
106
            // The 16-bit data area was not introduced until format version 2.0.
107
            None
×
108
        } else if let Some(data_16_bit_end) = index.data_16_bit_end {
×
109
            let data_16_bit = get_subslice(
×
110
                body,
111
                (index.keys_end as usize) * core::mem::size_of::<u32>()
×
112
                    ..(data_16_bit_end as usize) * core::mem::size_of::<u32>(),
×
113
            )?;
×
114
            Some(data_16_bit)
×
115
        } else {
116
            return Err(BinaryDeserializerError::invalid_data(
×
117
                "offset to the end of 16-bit data not specified",
118
            ));
119
        };
120

121
        Ok(Self {
×
122
            input: body,
123
            _format_version: header.repr_info.format_version,
×
124
            data_16_bit,
×
125
            keys,
126
            body,
127
        })
128
    }
×
129

130
    /// Reads the next resource descriptor without updating the input position.
131
    fn peek_next_resource_descriptor(&self) -> Result<ResDescriptor, BinaryDeserializerError> {
×
132
        let input = get_subslice(self.input, 0..4)?;
×
133
        let descriptor = match input.try_into() {
×
134
            Ok(value) => value,
×
135
            Err(_) => {
136
                return Err(BinaryDeserializerError::invalid_data(
×
137
                    "unable to read resource descriptor",
138
                ))
139
            }
140
        };
141
        let descriptor = u32::from_le_bytes(descriptor);
×
142

143
        ResDescriptor::try_from(descriptor)
×
144
    }
×
145

146
    /// Reads the next resource descriptor.
147
    fn get_next_resource_descriptor(&mut self) -> Result<ResDescriptor, BinaryDeserializerError> {
×
148
        let result = self.peek_next_resource_descriptor();
×
149

150
        // Pop resource descriptor from input.
151
        self.input = get_subslice(self.input, core::mem::size_of::<u32>()..)?;
×
152

153
        result
×
154
    }
×
155

156
    /// Reads a 28-bit integer resource as a signed value.
157
    fn parse_signed(&mut self) -> Result<i32, BinaryDeserializerError> {
×
158
        let descriptor = self.get_next_resource_descriptor()?;
×
159
        match descriptor.resource_type() {
×
160
            // Since integers in the resource bundle are 28-bit, we need to
161
            // shift left and shift back right in order to get sign extension.
162
            // Per https://doc.rust-lang.org/reference/expressions/operator-expr.html#arithmetic-and-logical-binary-operators,
163
            // `>>` is arithmetic right shift on signed ints, so it gives us the
164
            // desired behavior.
165
            ResourceReprType::Int => Ok(descriptor.value_as_signed_int()),
×
166
            _ => Err(BinaryDeserializerError::resource_type_mismatch(
×
167
                "expected integer resource",
168
            )),
×
169
        }
170
    }
×
171

172
    /// Reads a 28-bit integer resource as an unsigned value.
173
    fn parse_unsigned(&mut self) -> Result<u32, BinaryDeserializerError> {
×
174
        let descriptor = self.get_next_resource_descriptor()?;
×
175
        match descriptor.resource_type() {
×
176
            ResourceReprType::Int => Ok(descriptor.value_as_unsigned_int()),
×
177
            _ => Err(BinaryDeserializerError::resource_type_mismatch(
×
178
                "expected integer resource",
179
            )),
×
180
        }
181
    }
×
182
}
183

184
impl<'de, 'a> de::Deserializer<'de> for &'a mut ResourceTreeDeserializer<'de> {
185
    type Error = BinaryDeserializerError;
186

187
    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
188
    where
189
        V: de::Visitor<'de>,
190
    {
191
        let descriptor = self.peek_next_resource_descriptor()?;
192
        match descriptor.resource_type() {
193
            ResourceReprType::_String | ResourceReprType::StringV2 => {
194
                self.deserialize_string(visitor)
195
            }
196
            ResourceReprType::Binary => self.deserialize_bytes(visitor),
197
            ResourceReprType::Table | ResourceReprType::Table16 | ResourceReprType::_Table32 => {
198
                self.deserialize_map(visitor)
199
            }
200
            ResourceReprType::_Alias => todo!(),
201
            ResourceReprType::Int => self.deserialize_u32(visitor),
202
            ResourceReprType::Array | ResourceReprType::Array16 | ResourceReprType::IntVector => {
203
                self.deserialize_seq(visitor)
204
            }
205
        }
206
    }
207

208
    fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value, Self::Error>
209
    where
210
        V: de::Visitor<'de>,
211
    {
212
        let value = self.parse_unsigned()?;
213
        let value = match value {
214
            0 => false,
215
            1 => true,
216
            _ => {
217
                return Err(BinaryDeserializerError::resource_type_mismatch(
218
                    "expected integer resource representable as boolean",
219
                ))
220
            }
221
        };
222

223
        visitor.visit_bool(value)
224
    }
225

226
    fn deserialize_i8<V>(self, visitor: V) -> Result<V::Value, Self::Error>
227
    where
228
        V: de::Visitor<'de>,
229
    {
230
        self.deserialize_i32(visitor)
231
    }
232

233
    fn deserialize_i16<V>(self, visitor: V) -> Result<V::Value, Self::Error>
234
    where
235
        V: de::Visitor<'de>,
236
    {
237
        self.deserialize_i32(visitor)
238
    }
239

240
    fn deserialize_i32<V>(self, visitor: V) -> Result<V::Value, Self::Error>
241
    where
242
        V: de::Visitor<'de>,
243
    {
244
        visitor.visit_i32(self.parse_signed()?)
245
    }
246

247
    fn deserialize_i64<V>(self, visitor: V) -> Result<V::Value, Self::Error>
248
    where
249
        V: de::Visitor<'de>,
250
    {
251
        visitor.visit_i64(self.parse_signed()? as i64)
252
    }
253

254
    fn deserialize_u8<V>(self, visitor: V) -> Result<V::Value, Self::Error>
255
    where
256
        V: de::Visitor<'de>,
257
    {
258
        self.deserialize_u32(visitor)
259
    }
260

261
    fn deserialize_u16<V>(self, visitor: V) -> Result<V::Value, Self::Error>
262
    where
263
        V: de::Visitor<'de>,
264
    {
265
        self.deserialize_u32(visitor)
266
    }
267

268
    fn deserialize_u32<V>(self, visitor: V) -> Result<V::Value, Self::Error>
269
    where
270
        V: de::Visitor<'de>,
271
    {
272
        visitor.visit_u32(self.parse_unsigned()?)
273
    }
274

275
    fn deserialize_u64<V>(self, visitor: V) -> Result<V::Value, Self::Error>
276
    where
277
        V: de::Visitor<'de>,
278
    {
279
        visitor.visit_u64(self.parse_unsigned()? as u64)
280
    }
281

282
    fn deserialize_f32<V>(self, _visitor: V) -> Result<V::Value, Self::Error>
283
    where
284
        V: de::Visitor<'de>,
285
    {
286
        // Resource bundles have no native concept of floating point numbers and
287
        // no examples of storing them have been encountered.
288
        unimplemented!()
289
    }
290

291
    fn deserialize_f64<V>(self, _visitor: V) -> Result<V::Value, Self::Error>
292
    where
293
        V: de::Visitor<'de>,
294
    {
295
        // Resource bundles have no native concept of floating point numbers and
296
        // no examples of storing them have been encountered.
297
        unimplemented!()
298
    }
299

300
    fn deserialize_char<V>(self, _visitor: V) -> Result<V::Value, Self::Error>
301
    where
302
        V: de::Visitor<'de>,
303
    {
304
        // Resource bundles have no native concept of single characters and no
305
        // examples of storing them have been encountered.
306
        unimplemented!()
307
    }
308

309
    fn deserialize_str<V>(self, visitor: V) -> Result<V::Value, Self::Error>
310
    where
311
        V: de::Visitor<'de>,
312
    {
313
        // Strings in resource bundles are stored as UTF-16 and can't be
314
        // borrowed.
315
        self.deserialize_string(visitor)
316
    }
317

318
    fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, Self::Error>
319
    where
320
        V: de::Visitor<'de>,
321
    {
322
        let descriptor = self.get_next_resource_descriptor()?;
323
        match descriptor.resource_type() {
324
            ResourceReprType::_String => todo!(),
325
            ResourceReprType::StringV2 => {
326
                if let Some(data_16_bit) = self.data_16_bit {
327
                    if descriptor.is_empty() {
328
                        return visitor.visit_str("");
329
                    }
330

331
                    let input = get_subslice(data_16_bit, descriptor.value_as_16_bit_offset()..)?;
332
                    let de = Resource16BitDeserializer::new(input);
333
                    de.deserialize_string(visitor)
334
                } else {
335
                    Err(BinaryDeserializerError::invalid_data(
336
                        "StringV2 resource without 16-bit data block",
337
                    ))
338
                }
339
            }
340
            _ => Err(BinaryDeserializerError::resource_type_mismatch(
341
                "expected string resource",
342
            )),
343
        }
344
    }
345

346
    fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, Self::Error>
347
    where
348
        V: de::Visitor<'de>,
349
    {
350
        let descriptor = self.get_next_resource_descriptor()?;
351
        let value = match descriptor.resource_type() {
352
            ResourceReprType::Binary => {
353
                // Binary resources are, by definition, a sequence of arbitrary
354
                // bytes and can be borrowed as such.
355
                if descriptor.is_empty() {
356
                    // Handle empty descriptors per-type so we don't miss a type
357
                    // mismatch.
358
                    return visitor.visit_borrowed_bytes(&[]);
359
                }
360

361
                let input = get_subslice(self.body, descriptor.value_as_32_bit_offset()..)?;
362
                let (length, input) = read_u32(input)?;
363

364
                get_subslice(input, 0..length as usize)?
365
            }
366
            ResourceReprType::IntVector => {
367
                // Int vector resources are stored as a sequence of 32-bit
368
                // integers in the bundle's native endian. For zero-copy, it may
369
                // be desirable to simply borrow as bytes.
370
                if descriptor.is_empty() {
371
                    // Handle empty descriptors per-type so we don't miss a type
372
                    // mismatch.
373
                    return visitor.visit_borrowed_bytes(&[]);
374
                }
375

376
                let input = get_subslice(self.body, descriptor.value_as_32_bit_offset()..)?;
377
                let (length, input) = read_u32(input)?;
378

379
                get_subslice(input, ..(length as usize) * core::mem::size_of::<u32>())?
380
            }
381
            ResourceReprType::StringV2 => {
382
                // String resources are stored as UTF-16 strings in the bundle's
383
                // native endian. In situations where treatment as strings may
384
                // not be needed or performance would benefit from lazy
385
                // interpretation, allow for zero-copy.
386
                if let Some(data_16_bit) = self.data_16_bit {
387
                    if descriptor.is_empty() {
388
                        // Handle empty descriptors per-type so we don't miss a
389
                        // type mismatch.
390
                        return visitor.visit_borrowed_bytes(&[]);
391
                    }
392

393
                    let input = get_subslice(data_16_bit, descriptor.value_as_16_bit_offset()..)?;
394
                    let (length, input) = get_length_and_start_of_utf16_string(input)?;
395
                    get_subslice(input, ..length * core::mem::size_of::<u16>())?
396
                } else {
397
                    return Err(BinaryDeserializerError::invalid_data(
398
                        "StringV2 resource without 16-bit data block",
399
                    ));
400
                }
401
            }
402
            _ => {
403
                return Err(BinaryDeserializerError::resource_type_mismatch(
404
                    "expected binary data resource",
405
                ))
406
            }
407
        };
408

409
        visitor.visit_borrowed_bytes(value)
410
    }
411

412
    fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, Self::Error>
413
    where
414
        V: de::Visitor<'de>,
415
    {
416
        self.deserialize_bytes(visitor)
417
    }
418

419
    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error>
420
    where
421
        V: de::Visitor<'de>,
422
    {
423
        visitor.visit_some(self)
424
    }
425

426
    fn deserialize_unit<V>(self, _visitor: V) -> Result<V::Value, Self::Error>
427
    where
428
        V: de::Visitor<'de>,
429
    {
430
        // There's no concept of `null` or any other unit type in resource
431
        // bundles.
432
        unimplemented!()
433
    }
434

435
    fn deserialize_unit_struct<V>(
436
        self,
437
        _name: &'static str,
438
        _visitor: V,
439
    ) -> Result<V::Value, Self::Error>
440
    where
441
        V: de::Visitor<'de>,
442
    {
443
        // There's no concept of `null` or any other unit type in resource
444
        // bundles.
445
        unimplemented!()
446
    }
447

448
    fn deserialize_newtype_struct<V>(
449
        self,
450
        _name: &'static str,
451
        visitor: V,
452
    ) -> Result<V::Value, Self::Error>
453
    where
454
        V: de::Visitor<'de>,
455
    {
456
        // Resource bundles have no concept of newtypes, so just pass through
457
        // and let the visitor ask for what it expects.
458
        visitor.visit_newtype_struct(self)
459
    }
460

461
    fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
462
    where
463
        V: de::Visitor<'de>,
464
    {
465
        let descriptor = self.get_next_resource_descriptor()?;
466
        match descriptor.resource_type() {
467
            ResourceReprType::Array => {
468
                if descriptor.is_empty() {
469
                    // Handle empty descriptors per-type so we don't miss a type
470
                    // mismatch.
471
                    return visitor.visit_seq(EmptySeqAccess);
472
                }
473

474
                let input = get_subslice(self.body, descriptor.value_as_32_bit_offset()..)?;
475
                let (length, offsets) = read_u32(input)?;
476

477
                visitor.visit_seq(ArraySeqAccess {
478
                    de: self,
479
                    descriptors: offsets,
480
                    remaining: length as usize,
481
                })
482
            }
483
            ResourceReprType::Array16 => {
484
                if descriptor.is_empty() {
485
                    // Handle empty descriptors per-type so we don't miss a type
486
                    // mismatch.
487
                    return visitor.visit_seq(EmptySeqAccess);
488
                }
489

490
                if let Some(data_16_bit) = self.data_16_bit {
491
                    let input = get_subslice(data_16_bit, descriptor.value_as_16_bit_offset()..)?;
492
                    let (length, offsets) = read_u16(input)?;
493

494
                    let result = visitor.visit_seq(Array16SeqAccess {
495
                        data_16_bit,
496
                        offsets,
497
                        remaining: length as usize,
498
                    });
499

500
                    result
501
                } else {
502
                    Err(BinaryDeserializerError::invalid_data(
503
                        "StringV2 resource with no 16-bit data",
504
                    ))
505
                }
506
            }
507
            ResourceReprType::IntVector => {
508
                if descriptor.is_empty() {
509
                    // Handle empty descriptors per-type so we don't miss a type
510
                    // mismatch.
511
                    return visitor.visit_seq(EmptySeqAccess);
512
                }
513

514
                let input = get_subslice(self.body, descriptor.value_as_32_bit_offset()..)?;
515
                let (length, values) = read_u32(input)?;
516

517
                let result = visitor.visit_seq(IntVectorSeqAccess {
518
                    values,
519
                    remaining: length as usize,
520
                });
521

522
                result
523
            }
524
            _ => Err(BinaryDeserializerError::resource_type_mismatch(
525
                "expected array resource",
526
            )),
527
        }
528
    }
529

530
    fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, Self::Error>
531
    where
532
        V: de::Visitor<'de>,
533
    {
534
        // We copy `serde_json` in handling tuples as sequences.
535
        self.deserialize_seq(visitor)
536
    }
537

538
    fn deserialize_tuple_struct<V>(
539
        self,
540
        _name: &'static str,
541
        _len: usize,
542
        visitor: V,
543
    ) -> Result<V::Value, Self::Error>
544
    where
545
        V: de::Visitor<'de>,
546
    {
547
        // We copy `serde_json` in handling tuples as sequences.
548
        self.deserialize_seq(visitor)
549
    }
550

551
    fn deserialize_map<V>(self, visitor: V) -> Result<V::Value, Self::Error>
552
    where
553
        V: de::Visitor<'de>,
554
    {
555
        let descriptor = self.get_next_resource_descriptor()?;
556
        match descriptor.resource_type() {
557
            ResourceReprType::Table => {
558
                if descriptor.is_empty() {
559
                    // Handle empty descriptors per-type so we don't miss a type
560
                    // mismatch.
561
                    return visitor.visit_map(EmptyMapAccess);
562
                }
563

564
                let input = get_subslice(self.body, descriptor.value_as_32_bit_offset()..)?;
565
                let (length, keys) = read_u16(input)?;
566

567
                // Most values in the file are 32-bit aligned, so sequences of
568
                // 16-bit values may be padded.
569
                let length_with_padding = (length + ((length + 1) % 2)) as usize;
570

571
                let values_offset = length_with_padding * core::mem::size_of::<u16>();
572
                let values = get_subslice(keys, values_offset..)?;
573

574
                visitor.visit_map(TableMapAccess {
575
                    de: self,
576
                    keys,
577
                    values,
578
                    remaining: length as usize,
579
                })
580
            }
581
            ResourceReprType::_Table32 => todo!(),
582
            ResourceReprType::Table16 => todo!(),
583
            _ => Err(BinaryDeserializerError::resource_type_mismatch(
584
                "expected table resource",
585
            )),
586
        }
587
    }
588

589
    fn deserialize_struct<V>(
590
        self,
591
        _name: &'static str,
592
        _fields: &'static [&'static str],
593
        visitor: V,
594
    ) -> Result<V::Value, Self::Error>
595
    where
596
        V: de::Visitor<'de>,
597
    {
598
        self.deserialize_map(visitor)
599
    }
600

601
    fn deserialize_enum<V>(
602
        self,
603
        _name: &'static str,
604
        _variants: &'static [&'static str],
605
        _visitor: V,
606
    ) -> Result<V::Value, Self::Error>
607
    where
608
        V: de::Visitor<'de>,
609
    {
610
        // Resource bundles have no concept of an enum and it's unclear how to
611
        // handle untagged heterogeneous values.
612
        todo!()
613
    }
614

615
    fn deserialize_identifier<V>(self, _visitor: V) -> Result<V::Value, Self::Error>
616
    where
617
        V: de::Visitor<'de>,
618
    {
619
        unimplemented!()
620
    }
621

622
    fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
623
    where
624
        V: de::Visitor<'de>,
625
    {
626
        let (_, input) = read_u32(self.input)?;
627
        self.input = input;
628

629
        visitor.visit_none()
630
    }
631

632
    #[inline]
633
    fn is_human_readable(&self) -> bool {
×
634
        false
635
    }
×
636
}
637

638
/// The `Array16SeqAccess` struct provides deserialization for resources of type
639
/// `Array16`.
640
///
641
/// See [`ResourceReprType`] for more details.
642
struct Array16SeqAccess<'de> {
643
    data_16_bit: &'de [u8],
644
    offsets: &'de [u8],
645
    remaining: usize,
646
}
647

648
impl<'de> de::SeqAccess<'de> for Array16SeqAccess<'de> {
649
    type Error = BinaryDeserializerError;
650

651
    fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
652
    where
653
        T: de::DeserializeSeed<'de>,
654
    {
655
        if self.remaining == 0 {
656
            return Ok(None);
657
        }
658

659
        // Elements are stored as a sequence of `u16` offsets. Pop one and
660
        // deserialize the corresponding resource.
661
        let (offset, rest) = read_u16(self.offsets)?;
662
        self.offsets = rest;
663
        self.remaining -= 1;
664

665
        let input = get_subslice(
666
            self.data_16_bit,
667
            (offset as usize) * core::mem::size_of::<u16>()..,
668
        )?;
669
        let de = Resource16BitDeserializer::new(input);
670
        seed.deserialize(de).map(Some)
671
    }
672

673
    fn size_hint(&self) -> Option<usize> {
×
674
        Some(self.remaining)
×
675
    }
×
676
}
677

678
/// The `ArraySeqAccess` struct provides deserialization for resources of type
679
/// `Array`.
680
///
681
/// See [`ResourceReprType`] for more details.
682
struct ArraySeqAccess<'a, 'de: 'a> {
683
    de: &'a mut ResourceTreeDeserializer<'de>,
684
    descriptors: &'de [u8],
685
    remaining: usize,
686
}
687

688
impl<'de, 'a> de::SeqAccess<'de> for ArraySeqAccess<'a, 'de> {
689
    type Error = BinaryDeserializerError;
690

691
    fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
692
    where
693
        T: de::DeserializeSeed<'de>,
694
    {
695
        if self.remaining == 0 {
696
            return Ok(None);
697
        }
698

699
        // Elements are stored as a sequence of resource descriptors. Pop one
700
        // and deserialize the corresponding resource.
701
        let input = self.descriptors;
702
        self.descriptors = get_subslice(self.descriptors, core::mem::size_of::<u32>()..)?;
703
        self.remaining -= 1;
704

705
        // Input must always start at a resource descriptor. The rest of the
706
        // input is immaterial in this case, as we will return to this function
707
        // for the next descriptor.
708
        self.de.input = input;
709
        seed.deserialize(&mut *self.de).map(Some)
710
    }
711

712
    fn size_hint(&self) -> Option<usize> {
×
713
        Some(self.remaining)
×
714
    }
×
715
}
716

717
/// The `IntVectorSeqAccess` struct provides deserialization for resources of
718
/// type `IntVector`.
719
///
720
/// See [`ResourceReprType`] for more details.
721
struct IntVectorSeqAccess<'de> {
722
    values: &'de [u8],
723
    remaining: usize,
724
}
725

726
impl<'de> de::SeqAccess<'de> for IntVectorSeqAccess<'de> {
727
    type Error = BinaryDeserializerError;
728

729
    fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
730
    where
731
        T: de::DeserializeSeed<'de>,
732
    {
733
        if self.remaining == 0 {
734
            return Ok(None);
735
        }
736

737
        // Elements are stored as a sequence of 32-bit integers. Pop one and
738
        // feed it into the specialized int vector deserializer.
739
        let input = self.values;
740
        self.values = get_subslice(self.values, core::mem::size_of::<u32>()..)?;
741
        self.remaining -= 1;
742

743
        let de = IntVectorDeserializer::new(input);
744
        seed.deserialize(de).map(Some)
745
    }
746

747
    fn size_hint(&self) -> Option<usize> {
×
748
        Some(self.remaining)
×
749
    }
×
750
}
751

752
/// The `EmptySeqAccess` struct provides for deserialization of any empty
753
/// array resource, including `IntVector` and string types.
754
struct EmptySeqAccess;
755

756
impl<'de> de::SeqAccess<'de> for EmptySeqAccess {
757
    type Error = BinaryDeserializerError;
758

759
    fn next_element_seed<T>(&mut self, _seed: T) -> Result<Option<T::Value>, Self::Error>
760
    where
761
        T: de::DeserializeSeed<'de>,
762
    {
763
        Ok(None)
764
    }
765

766
    fn size_hint(&self) -> Option<usize> {
×
767
        Some(0)
×
768
    }
×
769
}
770

771
/// The `EmptyMapAccess` struct provides for deserialization of any empty
772
/// table resource.
773
struct EmptyMapAccess;
774

775
impl<'de> de::MapAccess<'de> for EmptyMapAccess {
776
    type Error = BinaryDeserializerError;
777

778
    fn next_key_seed<K>(&mut self, _seed: K) -> Result<Option<K::Value>, Self::Error>
779
    where
780
        K: de::DeserializeSeed<'de>,
781
    {
782
        Ok(None)
783
    }
784

785
    #[allow(clippy::panic)]
786
    fn next_value_seed<V>(&mut self, _seed: V) -> Result<V::Value, Self::Error>
787
    where
788
        V: de::DeserializeSeed<'de>,
789
    {
790
        // We should never reach this code unless serde has violated the
791
        // invariant of never calling `next_value_seed()` if `next_key_seed()`
792
        // returns `None`. We have no reasonable value to return here, so our
793
        // only choice is to panic.
794
        panic!("Unable to process value for empty map. This is likely a `serde` bug.");
795
    }
796

797
    fn size_hint(&self) -> Option<usize> {
×
798
        Some(0)
×
799
    }
×
800
}
801

802
/// The `TableMapAccess` struct provides deserialization for resources of type
803
/// `Table`.
804
///
805
/// See [`ResourceReprType`] for more details.
806
struct TableMapAccess<'de, 'a> {
807
    de: &'a mut ResourceTreeDeserializer<'de>,
808
    keys: &'de [u8],
809
    values: &'de [u8],
810
    remaining: usize,
811
}
812

813
impl<'de, 'a> de::MapAccess<'de> for TableMapAccess<'de, 'a> {
814
    type Error = BinaryDeserializerError;
815

816
    fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>, Self::Error>
817
    where
818
        K: de::DeserializeSeed<'de>,
819
    {
820
        if self.remaining == 0 {
821
            return Ok(None);
822
        }
823

824
        // Keys are stored as a sequence of byte offsets into the key block. Pop
825
        // one and feed it into the specialized key deserializer.
826
        let (key, keys) = read_u16(self.keys)?;
827
        self.keys = keys;
828
        self.remaining -= 1;
829

830
        let input = get_subslice(self.de.keys, key as usize..).or(Err(
831
            BinaryDeserializerError::invalid_data("unexpected end of data while deserializing key"),
832
        ))?;
833

834
        let de = KeyDeserializer::new(input);
835
        seed.deserialize(de).map(Some)
836
    }
837

838
    fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value, Self::Error>
839
    where
840
        V: de::DeserializeSeed<'de>,
841
    {
842
        // Values are stored as a sequence of resource descriptors. Pop one and
843
        // deserialize the corresponding resource.
844
        let value = self.values;
845
        self.values = get_subslice(self.values, core::mem::size_of::<u32>()..)?;
846

847
        self.de.input = value;
848
        seed.deserialize(&mut *self.de)
849
    }
850

851
    fn size_hint(&self) -> Option<usize> {
×
852
        Some(self.remaining)
×
853
    }
×
854
}
855

856
/// The `Resource16BitDeserializer` struct processes resources which are a part
857
/// of the 16-bit data block of the resource bundle. A resource will be in the
858
/// 16-bit data block if and only if it is a `StringV2`.
859
pub struct Resource16BitDeserializer<'de> {
860
    input: &'de [u8],
861
}
862

863
impl<'de> Resource16BitDeserializer<'de> {
864
    fn new(input: &'de [u8]) -> Self {
×
865
        Self { input }
×
866
    }
×
867

868
    /// Reads a UTF-16 string from the 16-bit data block.
869
    fn read_string_v2(self) -> Result<String, BinaryDeserializerError> {
×
870
        let (length, input) = get_length_and_start_of_utf16_string(self.input)?;
×
871

872
        let byte_slices = input.chunks_exact(2).take(length);
×
873
        if byte_slices.len() != length {
×
874
            // `take()` will silently return fewer elements than requested if
875
            // the input is too short, but that's an error during deserialize.
876
            return Err(BinaryDeserializerError::invalid_data(
×
877
                "unexpected end of input while reading string",
878
            ));
879
        }
880

881
        let units = byte_slices.map(|bytes| {
×
882
            // We can safely unwrap as we guarantee above that this chunk is
883
            // exactly 2 bytes.
884
            #[allow(clippy::unwrap_used)]
885
            let bytes = <[u8; 2]>::try_from(bytes).unwrap();
×
886
            u16::from_le_bytes(bytes)
×
887
        });
×
888

889
        char::decode_utf16(units)
×
890
            .collect::<Result<String, _>>()
891
            .map_err(|_| {
×
892
                BinaryDeserializerError::invalid_data("string resource is not valid UTF-16")
×
893
            })
×
894
    }
×
895
}
896

897
impl<'de> de::Deserializer<'de> for Resource16BitDeserializer<'de> {
898
    type Error = BinaryDeserializerError;
899

900
    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
901
    where
902
        V: de::Visitor<'de>,
903
    {
904
        // The only type which can be wholly represented in the 16-bit data
905
        // block is `StringV2`.
906
        self.deserialize_string(visitor)
907
    }
908

909
    forward_to_deserialize_any! {
910
        bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str
911
        byte_buf option unit unit_struct newtype_struct seq tuple
912
        tuple_struct map struct enum identifier ignored_any
913
    }
914

915
    fn deserialize_string<V>(self, visitor: V) -> Result<V::Value, Self::Error>
916
    where
917
        V: de::Visitor<'de>,
918
    {
919
        // Because `StringV2` is stored as UTF-16, we can't borrow it as a
920
        // `&str`. If zero-copy is desired, an appropriate data structure such
921
        // as `ZeroVec` will be needed.
922
        visitor.visit_string(self.read_string_v2()?)
923
    }
924

925
    fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value, Self::Error>
926
    where
927
        V: de::Visitor<'de>,
928
    {
929
        // `StringV2` is a contiguous sequence of native-endian `u16`, so we can
930
        // zero-copy deserialize it if the visitor supports it.
931
        let (length, input) = get_length_and_start_of_utf16_string(self.input)?;
932
        let bytes = get_subslice(input, 0..length * core::mem::size_of::<u16>())?;
933

934
        visitor.visit_borrowed_bytes(bytes)
935
    }
936

937
    fn is_human_readable(&self) -> bool {
×
938
        false
939
    }
×
940
}
941

942
/// The `IntVectorDeserializer` struct processes an `IntVector` resource,
943
/// consisting of 32-bit integers.
944
pub struct IntVectorDeserializer<'de> {
945
    input: &'de [u8],
946
}
947

948
impl<'de> IntVectorDeserializer<'de> {
949
    fn new(input: &'de [u8]) -> Self {
×
950
        Self { input }
×
951
    }
×
952

953
    /// Reads a 32-bit integer from the current input as signed.
954
    fn read_signed(mut self) -> Result<i32, BinaryDeserializerError> {
×
955
        let (value, next) = read_u32(self.input)?;
×
956
        self.input = next;
×
957

958
        Ok(value as i32)
×
959
    }
×
960

961
    /// Reads a 32-bit integer from the current input as unsigned.
962
    fn read_unsigned(mut self) -> Result<u32, BinaryDeserializerError> {
×
963
        let (value, next) = read_u32(self.input)?;
×
964
        self.input = next;
×
965

966
        Ok(value)
×
967
    }
×
968
}
969

970
impl<'de> de::Deserializer<'de> for IntVectorDeserializer<'de> {
971
    type Error = BinaryDeserializerError;
972

973
    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
974
    where
975
        V: de::Visitor<'de>,
976
    {
977
        // The contents of `IntVector`s are always 32-bit integers. We can
978
        // safely generalize to `u32`, as there's no special handling needed
979
        // for signed integers.
980
        self.deserialize_u32(visitor)
981
    }
982

983
    forward_to_deserialize_any! {
984
        bool f32 f64 char str string
985
        bytes byte_buf option unit unit_struct newtype_struct seq tuple
986
        tuple_struct map struct enum identifier ignored_any
987
    }
988

989
    fn deserialize_i8<V>(self, visitor: V) -> Result<V::Value, Self::Error>
990
    where
991
        V: de::Visitor<'de>,
992
    {
993
        self.deserialize_i32(visitor)
994
    }
995

996
    fn deserialize_i16<V>(self, visitor: V) -> Result<V::Value, Self::Error>
997
    where
998
        V: de::Visitor<'de>,
999
    {
1000
        self.deserialize_i32(visitor)
1001
    }
1002

1003
    fn deserialize_i32<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1004
    where
1005
        V: de::Visitor<'de>,
1006
    {
1007
        visitor.visit_i32(self.read_signed()?)
1008
    }
1009

1010
    fn deserialize_i64<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1011
    where
1012
        V: de::Visitor<'de>,
1013
    {
1014
        visitor.visit_i64(self.read_signed()? as i64)
1015
    }
1016

1017
    fn deserialize_u8<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1018
    where
1019
        V: de::Visitor<'de>,
1020
    {
1021
        self.deserialize_u32(visitor)
1022
    }
1023

1024
    fn deserialize_u16<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1025
    where
1026
        V: de::Visitor<'de>,
1027
    {
1028
        self.deserialize_u32(visitor)
1029
    }
1030

1031
    fn deserialize_u32<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1032
    where
1033
        V: de::Visitor<'de>,
1034
    {
1035
        visitor.visit_u32(self.read_unsigned()?)
1036
    }
1037

1038
    fn deserialize_u64<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1039
    where
1040
        V: de::Visitor<'de>,
1041
    {
1042
        visitor.visit_u64(self.read_unsigned()? as u64)
1043
    }
1044
}
1045

1046
pub struct KeyDeserializer<'de> {
1047
    input: &'de [u8],
1048
}
1049

1050
impl<'de> KeyDeserializer<'de> {
1051
    fn new(input: &'de [u8]) -> Self {
×
1052
        Self { input }
×
1053
    }
×
1054

1055
    /// Reads a key from the current input.
1056
    fn read_key(self) -> Result<&'de str, BinaryDeserializerError> {
×
1057
        // Keys are stored as null-terminated UTF-8 strings. Locate the
1058
        // terminating byte and return as a borrowed string.
1059
        let terminator_pos = self.input.iter().position(|&byte| byte == 0).ok_or(
×
1060
            BinaryDeserializerError::invalid_data("unterminated key string"),
×
1061
        )?;
×
1062

1063
        let input = get_subslice(self.input, 0..terminator_pos)?;
×
1064
        core::str::from_utf8(input)
×
1065
            .map_err(|_| BinaryDeserializerError::invalid_data("key string is not valid UTF-8"))
×
1066
    }
×
1067
}
1068

1069
impl<'de> de::Deserializer<'de> for KeyDeserializer<'de> {
1070
    type Error = BinaryDeserializerError;
1071

1072
    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1073
    where
1074
        V: de::Visitor<'de>,
1075
    {
1076
        self.deserialize_str(visitor)
1077
    }
1078

1079
    forward_to_deserialize_any! {
1080
        bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char string
1081
        bytes byte_buf option unit unit_struct newtype_struct seq tuple
1082
        tuple_struct map struct enum identifier ignored_any
1083
    }
1084

1085
    fn deserialize_str<V>(self, visitor: V) -> Result<V::Value, Self::Error>
1086
    where
1087
        V: de::Visitor<'de>,
1088
    {
1089
        visitor.visit_borrowed_str(self.read_key()?)
1090
    }
1091
}
1092

1093
/// Determines the length in units of a serialized UTF-16 string.
1094
///
1095
/// Returns the length of the string and a slice beginning at the first unit.
1096
fn get_length_and_start_of_utf16_string(
×
1097
    input: &[u8],
1098
) -> Result<(usize, &[u8]), BinaryDeserializerError> {
1099
    let (first, rest) = read_u16(input)?;
×
1100

1101
    let (length, rest) = if (0xdc00..0xdfef).contains(&first) {
×
1102
        // The unit is the entire length marker.
1103
        ((first & 0x03ff) as usize, rest)
×
1104
    } else if (0xdfef..0xdfff).contains(&first) {
×
1105
        // The unit is the first of a 2-unit length marker.
1106
        let (second, rest) = read_u16(rest)?;
×
1107

1108
        (((first as usize - 0xdfef) << 16) | second as usize, rest)
×
1109
    } else if first == 0xdfff {
×
1110
        // The unit is the first of a 3-unit length marker.
1111
        let (second, rest) = read_u16(rest)?;
×
1112
        let (third, rest) = read_u16(rest)?;
×
1113

1114
        (((second as usize) << 16) | third as usize, rest)
×
1115
    } else {
1116
        // The string has implicit length. These are strings of at least 1
1117
        // and at most 40 units.
1118
        let length = rest
×
1119
            .chunks_exact(2)
1120
            .take(40)
1121
            .position(|chunk| chunk == [0, 0])
×
1122
            .ok_or(BinaryDeserializerError::invalid_data(
×
1123
                "unterminated string with implicit length",
1124
            ))?
×
1125
            + 1;
1126

1127
        (length, input)
×
1128
    };
1129

1130
    Ok((length, rest))
×
1131
}
×
1132

1133
impl de::StdError for BinaryDeserializerError {}
1134

1135
impl de::Error for BinaryDeserializerError {
1136
    fn custom<T>(_msg: T) -> Self
×
1137
    where
1138
        T: fmt::Display,
1139
    {
1140
        #[cfg(feature = "logging")]
1141
        log::warn!("Error during resource bundle deserialization: {_msg}");
×
1142

1143
        BinaryDeserializerError::unknown("error during deserialization; see logs")
×
1144
    }
×
1145
}
1146

1147
impl TryFrom<&[u8]> for BinIndex {
1148
    type Error = BinaryDeserializerError;
1149

UNCOV
1150
    fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
×
UNCOV
1151
        let (field_count, value) = read_u32(value)?;
×
1152

1153
        if field_count < 5 {
×
1154
            // We shouldn't call into this function in format version 1.0, as
1155
            // there is no index count field, and format version 1.1 and up all
1156
            // specify at least five fields.
UNCOV
1157
            return Err(BinaryDeserializerError::invalid_data(
×
1158
                "invalid index field count",
1159
            ));
1160
        }
1161

UNCOV
1162
        let (keys_end, value) = read_u32(value)?;
×
UNCOV
1163
        let (resources_end, value) = read_u32(value)?;
×
1164
        let (bundle_end, value) = read_u32(value)?;
×
1165
        let (largest_table_entry_count, value) = read_u32(value)?;
×
1166

1167
        // The following fields may or may not be present, depending on format
1168
        // version and whether or not the file is or uses a pool bundle.
UNCOV
1169
        let (bundle_attributes, data_16_bit_end, pool_checksum) = if field_count >= 6 {
×
UNCOV
1170
            let (bundle_attributes, value) = read_u32(value)?;
×
1171

UNCOV
1172
            let (data_16_bit_end, pool_checksum) = if field_count >= 7 {
×
UNCOV
1173
                let (data_16_bit_end, value) = read_u32(value)?;
×
1174

UNCOV
1175
                let pool_checksum = if field_count >= 8 {
×
1176
                    let (pool_checksum, _) = read_u32(value)?;
×
1177

UNCOV
1178
                    Some(pool_checksum)
×
1179
                } else {
UNCOV
1180
                    None
×
1181
                };
1182

UNCOV
1183
                (Some(data_16_bit_end), pool_checksum)
×
1184
            } else {
1185
                (None, None)
×
1186
            };
1187

UNCOV
1188
            (Some(bundle_attributes), data_16_bit_end, pool_checksum)
×
1189
        } else {
UNCOV
1190
            (None, None, None)
×
1191
        };
1192

UNCOV
1193
        Ok(Self {
×
1194
            field_count,
1195
            keys_end,
1196
            resources_end,
1197
            bundle_end,
1198
            largest_table_entry_count,
1199
            bundle_attributes,
1200
            data_16_bit_end,
1201
            pool_checksum,
1202
        })
1203
    }
×
1204
}
1205

1206
impl TryFrom<&[u8]> for FormatVersion {
1207
    type Error = BinaryDeserializerError;
1208

1209
    fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
×
1210
        let value = match value {
×
1211
            [1, 0, 0, 0] => FormatVersion::V1_0,
×
UNCOV
1212
            [1, 1, 0, 0] => FormatVersion::V1_1,
×
UNCOV
1213
            [1, 2, 0, 0] => FormatVersion::V1_2,
×
UNCOV
1214
            [1, 3, 0, 0] => FormatVersion::V1_3,
×
UNCOV
1215
            [2, 0, 0, 0] => FormatVersion::V2_0,
×
1216
            [3, 0, 0, 0] => FormatVersion::V3_0,
×
1217
            _ => {
1218
                return Err(BinaryDeserializerError::invalid_data(
×
1219
                    "unrecognized format version",
1220
                ))
1221
            }
1222
        };
1223

1224
        Ok(value)
×
UNCOV
1225
    }
×
1226
}
1227

1228
impl TryFrom<u32> for ResDescriptor {
1229
    type Error = BinaryDeserializerError;
1230

UNCOV
1231
    fn try_from(value: u32) -> Result<Self, Self::Error> {
×
1232
        let resource_type = ResourceReprType::try_from((value >> 28) as u16)?;
×
1233

UNCOV
1234
        Ok(Self::new(resource_type, value & MASK_28_BIT))
×
1235
    }
×
1236
}
1237

1238
/// Reads the first four bytes of the input and interprets them as a `u32` with
1239
/// native endianness.
1240
///
1241
/// Returns the `u32` and a slice containing all input after the interpreted
1242
/// bytes. Returns an error if the input is of insufficient length.
1243
fn read_u32(input: &[u8]) -> Result<(u32, &[u8]), BinaryDeserializerError> {
×
1244
    // Safe to unwrap at the end of this because `try_into()` for arrays will
1245
    // only fail if the slice is the wrong size.
1246
    #[allow(clippy::unwrap_used)]
UNCOV
1247
    let bytes = input
×
UNCOV
1248
        .get(0..core::mem::size_of::<u32>())
×
1249
        .ok_or(BinaryDeserializerError::invalid_data(
×
1250
            "unexpected end of input",
UNCOV
1251
        ))?
×
1252
        .try_into()
1253
        .unwrap();
UNCOV
1254
    let value = u32::from_le_bytes(bytes);
×
1255

1256
    let rest =
UNCOV
1257
        input
×
UNCOV
1258
            .get(core::mem::size_of::<u32>()..)
×
UNCOV
1259
            .ok_or(BinaryDeserializerError::invalid_data(
×
1260
                "unexpected end of input",
1261
            ))?;
×
1262
    Ok((value, rest))
×
1263
}
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc