• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zbraniecki / icu4x / 13958601093

19 Mar 2025 04:17PM UTC coverage: 74.164% (-1.5%) from 75.71%
13958601093

push

github

web-flow
Clean up properties docs (#6315)

58056 of 78281 relevant lines covered (74.16%)

819371.32 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

60.74
/provider/source/src/source.rs
1
// This file is part of ICU4X. For terms of use, please see the file
2
// called LICENSE at the top level of the ICU4X source tree
3
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4

5
use elsa::sync::FrozenMap;
6
use icu_locale_core::subtags::Region;
7
use icu_provider::prelude::*;
8
use std::any::Any;
9
use std::collections::BTreeMap;
10
use std::collections::HashSet;
11
use std::fmt::Debug;
12
#[cfg(feature = "networking")]
13
use std::fs::File;
14
#[cfg(feature = "networking")]
15
use std::io::BufWriter;
16
use std::io::Cursor;
17
use std::io::Read;
18
use std::path::Path;
19
use std::path::PathBuf;
20
use std::sync::OnceLock;
21
use std::sync::RwLock;
22
use zip::ZipArchive;
23

24
pub(crate) struct SerdeCache {
25
    pub(crate) root: AbstractFs,
26
    cache: FrozenMap<String, Box<dyn Any + Send + Sync>>,
27
}
28

29
impl Debug for SerdeCache {
30
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
×
31
        f.debug_struct("SerdeCache")
×
32
            .field("root", &self.root)
33
            // skip formatting the cache
34
            .finish()
35
    }
×
36
}
37

38
impl SerdeCache {
39
    pub fn new(root: AbstractFs) -> Self {
10✔
40
        Self {
10✔
41
            root,
10✔
42
            cache: FrozenMap::new(),
10✔
43
        }
×
44
    }
10✔
45

46
    fn read_and_parse<S>(
20,495✔
47
        &self,
48
        path: &str,
49
        parser: fn(&[u8]) -> Result<S, DataError>,
50
    ) -> Result<&S, DataError>
51
    where
52
        for<'de> S: serde::Deserialize<'de> + 'static + Send + Sync,
53
    {
54
        match self.cache.get(path) {
40,990✔
55
            Some(x) => x,
19,746✔
56
            None => self.cache.insert(
1,498✔
57
                path.to_string(),
749✔
58
                Box::new(
749✔
59
                    parser(&self.root.read_to_buf(path)?)
749✔
60
                        .map_err(|e| e.with_path_context(std::path::Path::new(path)))?,
×
61
                ),
62
            ),
749✔
63
        }
64
        .downcast_ref::<S>()
65
        .ok_or_else(|| DataError::custom("Cache error").with_type_context::<S>())
×
66
    }
20,495✔
67

68
    pub fn read_and_parse_json<S>(&self, path: &str) -> Result<&S, DataError>
8,750✔
69
    where
70
        for<'de> S: serde::Deserialize<'de> + 'static + Send + Sync,
71
    {
72
        self.read_and_parse(path, |bytes| {
9,326✔
73
            serde_json::from_slice(bytes)
576✔
74
                .map_err(|e| DataError::custom("JSON deserialize").with_display_context(&e))
×
75
        })
576✔
76
    }
8,750✔
77

78
    pub fn read_and_parse_toml<S>(&self, path: &str) -> Result<&S, DataError>
11,751✔
79
    where
80
        for<'de> S: serde::Deserialize<'de> + 'static + Send + Sync,
81
    {
82
        self.read_and_parse(path, |bytes| {
11,924✔
83
            toml::from_str(
173✔
84
                std::str::from_utf8(bytes)
173✔
85
                    .map_err(|e| DataError::custom("TOML UTF8").with_display_context(&e))?,
×
86
            )
87
            .map_err(|e| DataError::custom("TOML deserialize").with_display_context(&e))
×
88
        })
173✔
89
    }
11,751✔
90

91
    pub fn list(&self, path: &str) -> Result<impl Iterator<Item = String>, DataError> {
114✔
92
        self.root.list(path)
114✔
93
    }
114✔
94

95
    pub fn file_exists(&self, path: &str) -> Result<bool, DataError> {
8,314✔
96
        self.root.file_exists(path)
8,314✔
97
    }
8,314✔
98
}
99

100
pub(crate) struct ZipData {
101
    archive: ZipArchive<Cursor<Vec<u8>>>,
102
    file_list: HashSet<String>,
103
}
104

105
pub(crate) struct TarArchive {
106
    archive: Vec<u8>,
107
    file_list: HashSet<String>,
108
}
109

110
pub(crate) enum AbstractFs {
111
    Fs(PathBuf),
112
    Zip(RwLock<Result<ZipData, String>>),
113
    Tar(RwLock<Result<TarArchive, String>>),
114
    Memory(BTreeMap<&'static str, &'static [u8]>),
115
}
116

117
impl Debug for AbstractFs {
118
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
×
119
        f.debug_struct("AbstractFs").finish()
×
120
    }
×
121
}
122

123
impl AbstractFs {
124
    pub fn new(root: &Path) -> Result<Self, DataError> {
×
125
        if std::fs::metadata(root)
×
126
            .map_err(|e| DataError::from(e).with_path_context(root))?
×
127
            .is_dir()
128
        {
129
            Ok(Self::Fs(root.to_path_buf()))
×
130
        } else if root.extension().is_some_and(|ext| ext == "zip") {
×
131
            let archive = ZipArchive::new(Cursor::new(std::fs::read(root)?)).map_err(|e| {
×
132
                DataError::custom("Invalid ZIP file")
×
133
                    .with_display_context(&e)
134
                    .with_path_context(root)
×
135
            })?;
×
136
            let file_list = archive.file_names().map(String::from).collect();
×
137
            Ok(Self::Zip(RwLock::new(Ok(ZipData { archive, file_list }))))
×
138
        } else if root.ends_with(".tar.gz") {
×
139
            use std::io::Read;
140
            let mut data = Vec::new();
×
141
            flate2::read::GzDecoder::new(Cursor::new(std::fs::read(root)?))
×
142
                .read_to_end(&mut data)?;
×
143

144
            let file_list = tar::Archive::new(Cursor::new(&data))
×
145
                .entries_with_seek()
146
                .map(|e| {
×
147
                    e.into_iter().filter_map(|e| {
×
148
                        Some(e.ok()?.path().ok()?.as_os_str().to_str()?.to_string())
×
149
                    })
×
150
                })?
×
151
                .collect::<HashSet<_>>();
×
152

153
            Ok(Self::Tar(RwLock::new(Ok(TarArchive {
×
154
                archive: data,
×
155
                file_list,
×
156
            }))))
157
        } else {
×
158
            Err(DataError::custom("unsupported archive type").with_display_context(&root.display()))
×
159
        }
160
    }
×
161

162
    #[cfg(feature = "networking")]
163
    pub fn new_from_url(path: String) -> Self {
8✔
164
        if path.ends_with(".zip") {
16✔
165
            Self::Zip(RwLock::new(Err(path)))
6✔
166
        } else {
167
            Self::Tar(RwLock::new(Err(path)))
2✔
168
        }
169
    }
8✔
170

171
    fn init(&self) -> Result<(), DataError> {
9,194✔
172
        #[cfg(feature = "networking")]
173
        fn download(resource: &String) -> Result<PathBuf, DataError> {
2✔
174
            let root = std::env::var_os("ICU4X_SOURCE_CACHE")
4✔
175
                .map(PathBuf::from)
176
                .unwrap_or_else(|| std::env::temp_dir().join("icu4x-source-cache/"))
2✔
177
                .join(resource.rsplit("//").next().unwrap());
2✔
178
            if !root.exists() {
2✔
179
                log::info!("Downloading {resource}");
1✔
180
                std::fs::create_dir_all(root.parent().unwrap())?;
3✔
181
                let mut retry = 5;
1✔
182
                let mut response = loop {
1✔
183
                    match ureq::get(resource).call() {
1✔
184
                        Ok(r) => break r.into_body().into_reader(),
1✔
185
                        Err(e) if retry > 0 => {
×
186
                            log::warn!("Download error {e:?}, retrying...");
×
187
                            std::thread::sleep(std::time::Duration::from_secs(2));
×
188
                            retry -= 1;
×
189
                        }
×
190
                        Err(e) => {
×
191
                            return Err(DataError::custom("Download").with_display_context(&e))
×
192
                        }
×
193
                    }
194
                };
195
                std::io::copy(&mut response, &mut BufWriter::new(File::create(&root)?))?;
1✔
196
            }
1✔
197
            Ok(root)
2✔
198
        }
2✔
199

200
        #[cfg(feature = "networking")]
201
        if let Self::Zip(lock) = self {
9,194✔
202
            if lock.read().expect("poison").is_ok() {
202✔
203
                return Ok(());
200✔
204
            }
205
            let mut lock = lock.write().expect("poison");
2✔
206
            let resource = if let Err(resource) = &*lock {
2✔
207
                resource
208
            } else {
209
                return Ok(());
×
210
            };
211

212
            let root = download(resource)?;
2✔
213

214
            let archive = ZipArchive::new(Cursor::new(std::fs::read(&root)?)).map_err(|e| {
2✔
215
                DataError::custom("Invalid ZIP file")
×
216
                    .with_display_context(&e)
217
                    .with_path_context(&root)
×
218
            })?;
×
219

220
            let file_list = archive.file_names().map(String::from).collect();
2✔
221

222
            *lock = Ok(ZipData { archive, file_list });
2✔
223
        } else if let Self::Tar(lock) = self {
8,994✔
224
            if lock.read().expect("poison").is_ok() {
×
225
                return Ok(());
×
226
            }
227
            let mut lock = lock.write().expect("poison");
×
228
            let resource = if let Err(resource) = &*lock {
×
229
                resource
230
            } else {
231
                return Ok(());
×
232
            };
233

234
            use std::io::Read;
235
            let mut data = Vec::new();
×
236
            flate2::read::GzDecoder::new(Cursor::new(std::fs::read(&download(resource)?)?))
×
237
                .read_to_end(&mut data)?;
×
238

239
            let file_list = tar::Archive::new(Cursor::new(&data))
×
240
                .entries_with_seek()
241
                .map(|e| {
×
242
                    e.into_iter().filter_map(|e| {
×
243
                        Some(e.ok()?.path().ok()?.as_os_str().to_str()?.to_string())
×
244
                    })
×
245
                })?
×
246
                .collect::<HashSet<_>>();
×
247

248
            *lock = Ok(TarArchive {
×
249
                archive: data,
×
250
                file_list,
×
251
            })
×
252
        }
×
253
        Ok(())
8,994✔
254
    }
9,194✔
255

256
    fn read_to_buf(&self, path: &str) -> Result<Vec<u8>, DataError> {
766✔
257
        self.init()?;
766✔
258
        match self {
766✔
259
            Self::Fs(root) => {
×
260
                log::debug!("Reading: {}/{}", root.display(), path);
×
261
                std::fs::read(root.join(path))
×
262
                    .map_err(|e| DataError::from(e).with_path_context(&root.join(path)))
×
263
            }
264
            Self::Zip(zip) => {
103✔
265
                log::debug!("Reading: <zip>/{}", path);
103✔
266
                let mut buf = Vec::new();
103✔
267
                zip.write()
103✔
268
                    .expect("poison")
269
                    .as_mut()
270
                    .ok()
271
                    .unwrap() // init called
272
                    .archive
273
                    .by_name(path)
274
                    .map_err(|e| {
103✔
275
                        DataErrorKind::Io(std::io::ErrorKind::NotFound)
×
276
                            .into_error()
277
                            .with_display_context(&e)
278
                            .with_display_context(path)
×
279
                    })?
×
280
                    .read_to_end(&mut buf)?;
103✔
281
                Ok(buf)
103✔
282
            }
103✔
283
            Self::Tar(tar) => {
×
284
                log::debug!("Reading: <tar>/{}", path);
×
285
                tar::Archive::new(Cursor::new(
×
286
                    &tar.read().expect("poison").as_ref().unwrap().archive,
×
287
                )) // init called
288
                .entries_with_seek()
289
                .and_then(|e| {
×
290
                    for e in e {
×
291
                        let e = e?;
×
292
                        if e.path()?.as_os_str() == path {
×
293
                            return e.bytes().collect::<Result<Vec<_>, std::io::Error>>();
×
294
                        }
295
                    }
×
296
                    Err(std::io::ErrorKind::NotFound.into())
×
297
                })
×
298
                .map_err(|e| {
×
299
                    DataErrorKind::Io(e.kind())
×
300
                        .into_error()
301
                        .with_display_context(&e)
302
                        .with_display_context(path)
×
303
                })
×
304
            }
×
305
            Self::Memory(map) => map.get(path).copied().map(Vec::from).ok_or_else(|| {
663✔
306
                DataError::custom("Not found in icu4x-datagen's data/").with_display_context(path)
×
307
            }),
×
308
        }
309
    }
766✔
310

311
    #[allow(dead_code)]
312
    pub(crate) fn read_to_string(&self, path: &str) -> Result<String, DataError> {
17✔
313
        let vec = self.read_to_buf(path)?;
17✔
314
        let s = String::from_utf8(vec)
17✔
315
            .map_err(|e| DataError::custom("Invalid UTF-8").with_display_context(&e))?;
×
316
        Ok(s)
17✔
317
    }
17✔
318

319
    fn list(&self, path: &str) -> Result<impl Iterator<Item = String>, DataError> {
114✔
320
        self.init()?;
114✔
321
        Ok(match self {
228✔
322
            Self::Fs(root) => std::fs::read_dir(root.join(path))
×
323
                .map_err(|e| DataError::from(e).with_display_context(path))?
×
324
                .map(|e| -> Result<_, DataError> { Ok(e?.file_name().into_string().unwrap()) })
×
325
                .collect::<Result<HashSet<_>, DataError>>()
326
                .map(HashSet::into_iter)?,
×
327
            Self::Zip(zip) => zip
1✔
328
                .read()
329
                .expect("poison")
330
                .as_ref()
331
                .ok()
332
                .unwrap() // init called
333
                .file_list
334
                .iter()
335
                .filter_map(|p| p.strip_prefix(path))
41,481✔
336
                .filter_map(|suffix| suffix.split('/').find(|s| !s.is_empty()))
18,555✔
337
                .map(String::from)
338
                .collect::<HashSet<_>>()
339
                .into_iter(),
1✔
340
            Self::Tar(tar) => tar
×
341
                .read()
342
                .expect("poison")
343
                .as_ref()
344
                .ok()
345
                .unwrap() // init called
346
                .file_list
347
                .iter()
348
                .filter_map(|p| p.strip_prefix(path))
×
349
                .filter_map(|suffix| suffix.split('/').find(|s| !s.is_empty()))
×
350
                .map(String::from)
351
                .collect::<HashSet<_>>()
352
                .into_iter(),
×
353
            Self::Memory(map) => map
113✔
354
                .keys()
355
                .copied()
356
                .filter_map(|p| p.strip_prefix(path))
54,773✔
357
                .filter_map(|suffix| suffix.split('/').find(|s| !s.is_empty()))
16,006✔
358
                .map(String::from)
359
                .collect::<HashSet<_>>()
360
                .into_iter(),
361
        })
362
    }
114✔
363

364
    fn file_exists(&self, path: &str) -> Result<bool, DataError> {
8,313✔
365
        self.init()?;
8,313✔
366
        Ok(match self {
16,626✔
367
            Self::Fs(root) => root.join(path).is_file(),
×
368
            Self::Zip(zip) => zip
98✔
369
                .read()
370
                .expect("poison")
371
                .as_ref()
372
                .ok()
373
                .unwrap() // init called
374
                .file_list
375
                .contains(path),
98✔
376
            Self::Tar(tar) => tar
×
377
                .read()
378
                .expect("poison")
379
                .as_ref()
380
                .ok()
381
                .unwrap() // init called
382
                .file_list
383
                .contains(path),
×
384
            Self::Memory(map) => map.contains_key(path),
8,215✔
385
        })
386
    }
8,313✔
387
}
388

389
#[derive(Debug)]
×
390
pub(crate) struct TzdbCache {
391
    pub(crate) root: AbstractFs,
392
    pub(crate) transitions: OnceLock<Result<parse_zoneinfo::table::Table, DataError>>,
×
393
    pub(crate) zone_tab: OnceLock<Result<BTreeMap<String, Region>, DataError>>,
×
394
}
395

396
fn strip_comments(mut line: String) -> String {
447✔
397
    if let Some(pos) = line.find('#') {
447✔
398
        line.truncate(pos);
30✔
399
    };
400
    line
447✔
401
}
447✔
402

403
impl TzdbCache {
404
    pub(crate) fn zone_tab(&self) -> Result<&BTreeMap<String, Region>, DataError> {
1✔
405
        self.zone_tab
1✔
406
            .get_or_init(|| {
895✔
407
                let mut r = BTreeMap::new();
895✔
408

409
                for line in self
895✔
410
                    .root
411
                    .read_to_string("zone.tab")?
×
412
                    .lines()
413
                    .map(ToOwned::to_owned)
414
                    .map(strip_comments)
415
                {
416
                    let mut fields = line.split('\t');
447✔
417

418
                    let Some(country_code) = fields.next() else {
447✔
419
                        continue;
420
                    };
421

422
                    let Ok(region) = country_code.parse() else {
447✔
423
                        continue;
424
                    };
425

426
                    let Some(_coords) = fields.next() else {
417✔
427
                        continue;
428
                    };
429

430
                    let Some(iana) = fields.next() else {
417✔
431
                        continue;
432
                    };
433

434
                    r.insert(iana.to_owned(), region);
417✔
435
                }
448✔
436
                Ok(r)
1✔
437
            })
1✔
438
            .as_ref()
439
            .map_err(|&e| e)
×
440
    }
1✔
441

442
    pub(crate) fn transitions(&self) -> Result<&parse_zoneinfo::table::Table, DataError> {
2✔
443
        use parse_zoneinfo::line::{Line, LineParser};
444
        use parse_zoneinfo::table::TableBuilder;
445

446
        self.transitions
2✔
447
            .get_or_init(|| {
37,129✔
448
                let tzfiles = [
37,129✔
449
                    "africa",
450
                    "antarctica",
451
                    "asia",
452
                    "australasia",
453
                    "backward",
454
                    "etcetera",
455
                    "europe",
456
                    "northamerica",
457
                    "southamerica",
458
                ];
459

460
                let mut lines = Vec::<String>::new();
37,129✔
461

462
                for file in tzfiles {
37,129✔
463
                    lines.extend(
9✔
464
                        self.root
9✔
465
                            .read_to_string(file)?
×
466
                            .lines()
467
                            .map(ToOwned::to_owned),
468
                    );
9✔
469
                }
1✔
470

471
                enum Section {
472
                    Normal,
473
                    Vanguard,
474
                    Rearguard,
475
                }
476
                let mut i = 0;
1✔
477
                let mut section = Section::Normal;
1✔
478

479
                while i < lines.len() {
5,729✔
480
                    match section {
18,534✔
481
                        Section::Normal => {
482
                            if lines[i].starts_with("# Vanguard section") {
18,442✔
483
                                lines.remove(i);
9✔
484
                                section = Section::Vanguard;
9✔
485
                            } else if lines[i].starts_with('#') {
18,424✔
486
                                lines.remove(i);
12,746✔
487
                            } else {
488
                                i += 1;
5,678✔
489
                            }
490
                        }
491
                        Section::Vanguard => {
492
                            if lines[i].starts_with("# Rearguard section") {
43✔
493
                                section = Section::Rearguard;
9✔
494
                            }
495
                            lines.remove(i);
43✔
496
                        }
497
                        Section::Rearguard => {
498
                            if lines[i].starts_with("# End of rearguard section") {
58✔
499
                                section = Section::Normal;
9✔
500
                                lines.remove(i);
9✔
501
                            } else {
502
                                // Rearguard lines mighht start with a # not followed by a space (that's a comment), or
503
                                // they might not ¯\_(ツ)_/¯.
504
                                if (lines[i].starts_with('#') && !lines[i].starts_with("# "))
90✔
505
                                    || !lines[i].contains('#')
49✔
506
                                {
507
                                    lines[i] =
41✔
508
                                        lines[i].strip_prefix('#').unwrap_or(&lines[i]).into();
52✔
509
                                    i += 1;
41✔
510
                                } else {
511
                                    lines.remove(i);
8✔
512
                                }
513
                            }
514
                        }
515
                    }
516
                }
517

518
                // Morocco doesn't have have rearguard data in the text file, so we have to replicate the transform from
519
                // ziguard.awk: https://github.com/eggert/tz/blob/271a5784a59e454b659d85948b5e65c17c11516a/ziguard.awk#L261-L299
520
                for line in lines.iter_mut() {
5,720✔
521
                    if line.starts_with("Rule\tMorocco") {
5,719✔
522
                        let mut parts = line.split('\t').skip(2);
183✔
523
                        let from = parts.next().unwrap();
183✔
524
                        let to = parts.next().unwrap();
183✔
525
                        let _type = parts.next().unwrap();
183✔
526
                        let month = parts.next().unwrap();
183✔
527
                        let _day = parts.next().unwrap();
183✔
528
                        let _time = parts.next().unwrap();
183✔
529
                        let save = parts.next().unwrap();
183✔
530
                        if to == "2018" && month == "Oct" {
184✔
531
                            *line = line.replace("2018", "2017");
1✔
532
                        } else if from.parse::<i32>().unwrap() >= 2019 {
182✔
533
                            if save.trim() == "0" {
284✔
534
                                *line = line.replace("\t0\t", "\t1:00\t");
71✔
535
                            } else {
536
                                *line = line.replace("\t-1:00\t", "\t0\t");
71✔
537
                            }
538
                        }
539
                    }
540
                    *line = line.replace("1:00\tMorocco\t%z", "0:00\tMorocco\t+00/+01");
5,719✔
541
                }
542

543
                #[allow(deprecated)] // no alternative?!
544
                let parser = LineParser::new();
22,877✔
545
                let mut table = TableBuilder::new();
11,439✔
546

547
                for line in lines {
11,439✔
548
                    match parser.parse_str(&line).unwrap() {
5,719✔
549
                        Line::Zone(zone) => table.add_zone_line(zone).unwrap(),
339✔
550
                        Line::Continuation(cont) => table.add_continuation_line(cont).unwrap(),
1,623✔
551
                        Line::Rule(rule) => table.add_rule_line(rule).unwrap(),
2,101✔
552
                        Line::Link(link) => table.add_link_line(link).unwrap(),
257✔
553
                        Line::Space => {}
554
                    }
555
                }
5,720✔
556

557
                Ok(table.build())
1✔
558
            })
1✔
559
            .as_ref()
560
            .map_err(|&e| e)
×
561
    }
2✔
562
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc